#! /bin/awk -f # # # # ###################################################################### # # # pdb2cif.oawk # # produced from pdb2cif.m4 # version 2.3.7 07 Mar 1999 # # a m4 macro program which produces pdb2cif.pl, pdb2cif.awk, pdb2cif.oawk # # Scripts to filter a PDB entry and produce a CIF file. # # Phil Bourne (bourne@sdsc.edu) # # adapted to 6 Oct 95 cifdic.m95 0.7.28 # and later to 1 Jan 97 cif_mm.dic 0.9.01 # # by # Herbert J. Bernstein # Bernstein+Sons, P.O. Box 177, Bellport, NY 11713 # phone: 1-516-286-1339, email: yaya@bernstein-plus-sons.com # and # Frances C. Bernstein # Bernstein+Sons, P.O. Box 177, Bellport, NY 11713 # phone: 1-516-286-1339, email: fcb@bernstein-plus-sons.com # # This work was supported in part by IUCr (for HJB), US NSF, PHS, NIH, # NCRR, NIGMS, NLM and DOE (for FCB prior to 1998), US NSFgrant no. # See H. Bernstein, F. Bernstein, P. E. Bourne "CIF Applications. VIII. # pdb2cif: Translating PDB Entries into mmCIF Format", J. Appl. Cryst., # 31, pp. 282-295, 1998. # #************************************************************************** # THE CONVERSION FROM PDB FORMAT TO CIF FORMAT IS COMPLEX # ******* USE WITH CAUTION ******* # COMMENTS AND SUGGESTIONS APPRECIATED # If you like the basic approach, thank Phil Bourne. He did # the real work of creating pdb2cif. If you have problems with # the adaptation to cif_mm.dic, tell yaya@bernstein-plus-sons.com # # ########################################################################### #************************************************************************** # # # This version available via http from: # # http://www.bernstein-plus-sons.com/software/pdb2cif # http://www.iucr.org/iucr-top/cif/software/pdb2cif # and the mirror sites of the IUCr # http://www.sdsc.edu/pb/pdb2cif/pdb2cif # http://ndbserver.rutgers.edu/NDB/mmcif/software/pdb2cif # and the mirror sites of the NDB # # See the file README for instructions on use and installation # ########################################################################### # # # # # # # # # # # # # # # # # # ############################################################################## # # Version History: # See the file CHANGES # ####################################################################### BEGIN { comma = "," lcaz="abcdefghijklmnopqrstuvwxyz" UCAZ="ABCDEFGHIJKLMNOPQRSTUVWXYZ" version = "2.3.7" version_date = "07 Mar 1999" printf ("\n") printf ("###################################################\n") printf ("# #\n") printf ("# Converted from PDB format to CIF format by #\n") printf ("# pdb2cif version %-15s %11s #\n",version,version_date) printf ("# by #\n") printf ("# P.E. Bourne, H.J. Bernstein and F.C. Bernstein #\n") printf ("# #\n") printf ("# http://www.bernstein-plus-sons.com/software #\n") printf ("# /pdb2cif #\n") printf ("# *** See the remarks at the end of this *** #\n") printf ("# *** file for information on conversion *** #\n") printf ("# *** of this entry and on the program *** #\n") printf ("# *** pdb2cif *** #\n") my_at = "@" printf ("# Please report problems to: #\n") printf(("# pdb2cif" my_at "bernstein-plus-sons.com #\n")) printf ("###################################################\n\n\n") # # Set starting variables # # The following flag is used to produce a more complete CIF entry, # i.e. data items are given, but with the value "?". # If you desire only the minimum set of data items comment out the # following one line: # verbose = "yes" # # The following flag controls conversion of text fields using # the type-setting codes used in some PDB entries # convtext = "yes" # # The following flag controls conversion of author and editor # names, "yes" to always convert according to the 1992 format # description, "conditional" to be controlled by convtext auth_convtext = "yes" # # uncomment the next line if convtext control of typesetting desired # auth_convtext = "conditional" # # The following flag controls the distribution of entity_seq_num # to all atom site lines, uncomment if you do _not_ want # this distribution done, but want denser atom lists # dense_list = "yes" # # # The following flag control the handling of Junior, Senior, etc # As of this writing, the mmCIF convention is to keep dynastic # indicators with the last name # junior_on_last = "yes" # # The following flag control the printing of TER records # The possible values are "yes" to print them, "no" to # suppress them, or "comment" to print them as comments # print_ter = "comment" # # each of these variables may be reset from within the text # with a line of the form # # #define variable value # # e.g. #define verbose yes # or #define convtext no # # In addition, to allow control of translation of author names # the variable "name" may be defined multiple times # with two values, in the form # # #define name PDB_form name_value # # where the PDB_form is the form of the name expected in the PDB # and name_value is the form to be used by this program. All blanks # in either form must be replaced by "_". For example, you can # give the following # # #define name E.F.MEYER_JUNIOR Meyer Junior,_E.F. # # If the same name is defined multiple times, only the last # translation given will be used. The PDB_form is not case-sensitive, # but the name_value is. # # Normally, the compliance_level for a PDB dataset should be # obtained from REMARK 4. However, to facilitate processing # of pseudo-pdb datasets from non-PDB sources, the compliance # level may be set by # # #define compliance_level level # # where 2.0 is the only meaningful case of level at this time # # If REAMRK 4 contains a compliance level, that will apply in # parsing from that point onwards aniso_flag = 0 atom_alt_flag = 1 atom_flag = 1 atom_flag_1 = 1 atom_flag_2 = 1 atom_res_flag = 0 audit_flag = 0 author_flag = 1 bcid = "*" # character to use for blank chain id cispep_flag = 0 cit_flag = 0 # 1 is the primary citation compnd_flag = 1 compliance_level = 0.0 connect_flag = 0 conect_flag = 1 conect_flag_2 = 0 conect_id = 1 dbref_flag = 0 # count of dbref records end_flag = 0 entity_flag = 0 entity_mon_flag = 1 ent_non_poly_point[" "] = "" ent_non_poly_num[" "] = 0 ent_poly_point[" "] = "" ent_poly_num[" "] = 0 entity_seq_num_flag = 0 flush_ref = 0 for (x = 1; x <= 999; ++x) { ftnote_flag[x] = 0 } ftnote_flag_old = 1 foot_flag = 0 formul_flag = 1 head_PDB_code = "." helix_flag = 1 het_flag = 1 hetnam_flag = 1 hetsyn_flag = 1 hydbnd_flag = 0 jrnl_flag = 1 keywrd_flag = 0 link_flag = 0 mon_flag = 1 model_flag = "." model_flags = "no" modres_flag = 0 mtrix_flag = 0 nmr_flag = 0 nonp_flag = 1 num_non_poly_ents = 0 num_poly_ents = 0 num_res_name = 0 num_res_pair = 0 origx_flag = 0 record_number = " " remark_flag = 0 remark_header_flag = 0 revdat_flag = 1 res_flag = 0 res_res_flag = 0 scale_flag = 0 seqadv_flag = 0 # count of seqadv records seqres_flag = 1 sheet_flag = 0 sheet_flag_2 = 1 sigatm_flag = 0 siguij_flag = 0 site_flag = 1 sltbrg_flag = 0 s_o_flag = 0 ss_flag = 1 # tracks HELIX, TURN and SHEET ss_flag_2 = 1 ssbond_flag = 1 ter_flag = 0 turn_flag = 1 turn_flag_2 = 1 tvect_flag = 0 vol_flag = 0 warning_flag = 0 # count of warnings in warning list xlat_flag = 0 xlat_save = 0 flag = 0 # flags to correctly set ; previous_keyword = " " # prior to new keyword remark_number = 0 remark_number_old = 0 all_remarks = 0 # # set up connect types # num_ctypes = split(\ ". . . . hydrog hydrog saltbr hydrog hydrog saltbr",\ connect_types," ") # # set up conversion strings for residues # numl="0123456789" charl=lcaz charu=UCAZ chars="+_*/!#$,.;:?|{}()" charx=(charl charu numl chars) # Define date format conversion arrays # # mmm2mm[month_name] = month_ordinal # yyyy[2_digit_year] = 4_digit_year # mmm2mm["JAN"] = "01" mmm2mm["FEB"] = "02" mmm2mm["MAR"] = "03" mmm2mm["APR"] = "04" mmm2mm["MAY"] = "05" mmm2mm["JUN"] = "06" mmm2mm["JUL"] = "07" mmm2mm["AUG"] = "08" mmm2mm["SEP"] = "09" mmm2mm["OCT"] = "10" mmm2mm["NOV"] = "11" mmm2mm["DEC"] = "12" # for (yy=0; yy < 100; ++yy) { yyyy[yy+0] = yy+1900 if ( yy < 70 ) yyyy[yy+0] += 1000 } # # Define lists of amino acids and nucleic acids num_aa = split(("ABU ACD ALA ALB ALI ARG ARO ASN ASP ASX" \ " BAS CYS GLN GLU GLX GLY HIS HYP ILE LEU" \ " LYS MET PCA PHE PRO SER THR TRP TYR VAL"),\ aa_list," ") num_na = split(("A +A C +C G +G I +I T +T U +U"),na_list," ") # # Formulae and Muloecular Weights for Standard Residues from # the 1992 PDB format Description # AARESES has the Amino Acids and Miscellaneous Residues # NARESES has the Nucleotides # # Name; Code;Formula;Mol. Wt. # num_AARESES = split((\ "Alanine;ALA;C3 H7 N1 O2;89.09|" \ "Arginine;ARG;C6 H14 N4 O2;174.20|" \ "Asparagine;ASN;C4 H8 N2 O3;132.12|" \ "Aspartic acid;ASP;C4 H7 N1 O4;133.10|" \ "ASP/ASN ambiguous;ASX;C4 H7.5 N1.5;132.61|" \ "Cysteine;CYS;C3 H7 N1 O2 S1;121.15|" \ "Glutamine;GLN;C5 H10 N2 O3;146.15|" \ "Glutamic acid;GLU;C5 H9 N1 O4;147.13|" \ "GLU/GLN ambiguous;GLX;C5 H9.5 N1.5 O3.5;146.64|" \ "Glycine;GLY;C2 H5 N1 O2;75.07|" \ "Histidine;HIS;C6 H9 N3 O2;155.16|" \ "Isoleucine;ILE;C6 H13 N1 O2;131.17|" \ "Leucine;LEU;C6 H13 N1 O2;131.17|" \ "Lysine;LYS;C6 H14 N2 O2;146.19|" \ "Methionine;MET;C5 H11 N1 O2 S1;149.21|" \ "Phenylalanine;PHE;C9 H11 N1 O2;165.19|" \ "Proline;PRO;C5 H9 N1 O2;115.13|" \ "Serine;SER;C3 H7 N1 O3;105.09|" \ "Threonine;THR;C4 H9 N1 O3;119.12|" \ "Tryptophan;TRP;C11 H12 N2 O2;204.23|" \ "Tyrosine;TYR;C9 H11 N1 O3;181.19|" \ "Valine;VAL;C5 H11 N1 O2;117.15|" \ "Undetermined;UNK;C5 H6 N1 O3;128.16|" \ "Acetic Acid;ACE;C2 H4 O2;60.05|" \ "Formic Acid;FOR;C1 H2 O2;40.03|" \ "Water;HOH;H2 O1;18.015"),AARESES_list,"|") for (naa = 1; naa <= num_AARESES; ++naa){ nxx = split(AARESES_list[naa],naa_split,";") res_id[naa] = naa_split[2] res_count[naa_split[2]] = 0 res_name[naa_split[2]] = naa_split[1] res_formul[naa_split[2]] = naa_split[3] } num_NARESES = split((\ "Adenosine; A;C10 H14 N5 O7 P1;347.22|" \ "Modified Adenosine; +A;.;347.22|" \ "1-Methyladenosine;1MA;C11 H16 N5 O7 P1;361.25|" \ "Cytidine; C;C9 H14 N3 O8 P1;323.20|" \ "Modified Cytidine; +C;.;323.20|" \ "5-Methylcytidine;5MC;C10 H16 N3 O8 P1;337.23|" \ "2'-O-Methylcytidine;OMC;C10 H17 N3 O8 P1;338.23|" \ "Guanosine; G;C10 H14 N5 O8 P1;363.22|" \ "Modified Guanosine; +G;.;363.22|" \ "1-Methylguanosine;1MG;C11 H16 N5 O8 P1;377.25|" \ "N2-Methylguanosine;2MG;C11 H16 N5 O8 P1;377.25|" \ "N2-Dimethylguanosine;M2G;C12 H18 N5 O8 P1;391.28|" \ "7-Methylguanosine;7MG;C11 H10 N5 O8 P1;377.25|" \ "2'-O-Methylguanosine;OMG;C11 H16 N5 O8 P1;377.25|" \ "Wybutosine; YG;C21 H26 N6 O11 P1;587.48|" \ "Inosine; I;C10 H13 N4 O8 P1;348.21|" \ "Modified Inosine; +I;.;348.21|" \ "Thymidine; T;C10 H15 N2 O8 P1;322.21|" \ "Modified Thymidine; +T;.;322.21|" \ "Uridine; U;C9 H13 N2 O9 P1;324.18|" \ "Modified Uridine; +U;.;324.18|" \ "Dihydrouridine;H2U;C9 H15 N2 O9 P1;326.20|" \ "Ribosylthymidine;5MU;C10 H16 N2 O10 P1;355.22|" \ "Pseudouridine;PSU;C9 H13 N2 O9 P1;324.18|"),NARESES_list,"|") for (nna = 1; nna <= num_NARESES; ++nna){ nxx = split(NARESES_list[nna],nna_split,";") res_id[num_AARESES+nna] = nna_split[2] res_count[nna_split[2]] = 0 res_name[nna_split[2]] = nna_split[1] res_formul[nna_split[2]] = nna_split[3] } num_OTRESES = 0 # # Element Lists to check atom types # num_per_tab = split(( " . D " \ "H HE " \ "LI BE B C N O F NE " \ "NA MG AL SI P S CL AR " \ "K CA SC TI V CR MN FE CO NI CU ZN GA GE AS SE BR KR " \ "RB SR Y ZR NB MO TC RU RH PD AG CD IN SN SB TE I XE " \ "CS BA " \ " LA CE PR ND PM SM EU GD TB DY HO ER TM YB LU "\ " HF TA W RE OS IR PT AU HG TL PB BI PO AT RN " \ "FR RA " \ " AC TH PA U NP PU AM CM BK CF ES FM MD NO LR " \ " KU HA SG NS HS" \ " DB JL RF BH HN MT"),\ periodic_table," ") num_aa_na_el = split((". C H D N O P S"),\ standard_res_elements," ") num_one_let_el = split((". B C H D N O F P S K V Y I W"),\ one_letter_elements," ") # # Define special name suffixes to move away from family names # num_suffix = split(\ ("JUNIOR SENIOR JR SR JR. SR." \ " I II III IV V VI VII VIII IX X XI XII"),\ suffix_list," ") xnum_suffix = split(\ ("Junior Senior Junior Senior Junior Senior" \ " I II III IV V VI VII VIII IX X XI XII"),\ rep_suffix_list," ") for (i = 1; i <= xnum_suffix; ++i) { rep_suffix[suffix_list[i]] = rep_suffix_list[i] } # # Setup charge conversions # # No Charge charge["0 "] = " " charge["00"] = " " charge[" "] = " " # 1+ charge["1+"] = "1+" charge["+1"] = "1+" charge["I "] = "1+" charge["i "] = "1+" charge["1 "] = "1+" charge["+ "] = "1+" # 1- charge["1-"] = "1-" charge["-1"] = "1-" charge["- "] = "1-" # 2+ charge["2+"] = "2+" charge["+2"] = "2+" charge["II"] = "2+" charge["ii"] = "2+" charge["2 "] = "2+" charge["++"] = "2+" # 2- charge["2-"] = "2-" charge["-2"] = "2-" charge["--"] = "2-" # 3+ charge["3+"] = "3+" charge["+3"] = "3+" charge["3 "] = "3+" # 3- charge["3-"] = "3-" charge["-3"] = "3-" # 4+ charge["4+"] = "4+" charge["+4"] = "4+" charge["4 "] = "4+" # 4- charge["4-"] = "4-" charge["-4"] = "4-" # 5+ charge["5+"] = "5+" charge["+5"] = "5+" charge["5 "] = "5+" # 5- charge["5-"] = "5-" charge["-5"] = "5-" charge["5 "] = "5-" # 6+ charge["6+"] = "6+" charge["+6"] = "6+" charge["6 "] = "6+" # 6- charge["6-"] = "6-" charge["-6"] = "6-" # 7+ charge["7+"] = "7+" charge["+7"] = "7+" charge["7 "] = "7+" # 7- charge["7-"] = "7-" charge["-7"] = "7-" # 8+ charge["8+"] = "8+" charge["+8"] = "8+" charge["8 "] = "8+" # 8- charge["8-"] = "8-" charge["-8"] = "8-" # 9+ charge["9+"] = "9+" charge["+9"] = "9+" charge["9 "] = "9+" # 9- charge["9-"] = "9-" charge["-9"] = "9-" } # End of BEGIN statement # # # Flag all lines as untranslated unless proven otherwise. # # Process #define (or #def) # # Determine whether this is a new keyword, if so and flag is set # terminate free text with a ; Also discard noise lines less than # 6 characters long, and pad other lines to 80 characters with blanks # # Ensure that the record name used is separated from following info # { if (end_flag == 0) { xlat_save = xlat_flag non_xlated[++xlat_flag] = $0 first_field = $1 if (NF > 1 && ($1 == "#def" || $1 == "#define")) { { lx_tl = length($2) tx_tl = $2 var_name = "" for (ix_tl = 1; ix_tl <= lx_tl; ++ix_tl) { cx_tl = substr(tx_tl,ix_tl,1) cx_tl = substr((lcaz cx_tl), \ index((UCAZ cx_tl), cx_tl),1) var_name = (var_name cx_tl) } } var_value = "" if (NF > 2) { lx_tl = length($3) tx_tl = $3 var_value = "" for (ix_tl = 1; ix_tl <= lx_tl; ++ix_tl) { cx_tl = substr(tx_tl,ix_tl,1) cx_tl = substr((lcaz cx_tl), \ index((UCAZ cx_tl), cx_tl),1) var_value = (var_value cx_tl) } } if (var_name == "verbose" && \ (var_value == "yes" || var_value == "no")) { verbose = var_value xlat_flag = xlat_save } else { if (var_name == "convtext" && \ (var_value == "yes" || var_value == "no")) { convtext = var_value xlat_flag = xlat_save } else { if (var_name == "auth_convtext" && (var_value == "yes" || var_value == "no" || \ var_value == "conditional")) { auth_convtext = var_value xlat_flag = xlat_save } else { if (var_name == "dense_list" && (var_value == "yes" || var_value == "no")) { dense_list = var_value xlat_flag = xlat_save } else { if (var_name == "junior_on_last" && (var_value == "yes" || var_value == "no")) { junior_on_last = var_value xlat_flag = xlat_save } else { if (var_name == "print_ter" && (var_value == "yes" || var_value == "no" || \ var_value == "comment")) { print_ter = var_value xlat_flag = xlat_save } else { if (var_name == "compliance_level") { compliance_level = var_value xlat_flag = xlat_save } } } } } } } if (NF > 3 && var_name == "name") { ll = length($3) PDB_form = "" for (i = 1; i <=ll; ++i) { cc = substr($3,i,1) if (cc=="_") cc = " " PDB_form = (PDB_form cc) } ll = length($4) name_value = "" for (i = 1; i <=ll; ++i) { cc = substr($4,i,1) if (cc=="_") cc = " " name_value = (name_value cc) } {# # apply PDB typsetting codes if any to a line # { lx_tl = length(PDB_form) tx_tl = PDB_form lostr = "" for (ix_tl = 1; ix_tl <= lx_tl; ++ix_tl) { cx_tl = substr(tx_tl,ix_tl,1) cx_tl = substr((lcaz cx_tl), \ index((UCAZ cx_tl), cx_tl),1) lostr = (lostr cx_tl) } } lstr = length(lostr) mystr = "" pchar = " " for( qtsi=1; qtsi <= lstr; ++qtsi) { mychar = substr( lostr, qtsi, 1) if( pchar == " " || \ pchar == "," || \ pchar == "." || \ pchar == "(" || \ pchar == "*" || \ pchar == "/" ) { mychar = substr((UCAZ mychar),\ index((lcaz mychar), mychar),1) } if( ( mychar != "*" && mychar != "$" && mychar != "/" ) \ || (mychar == pchar) ) mystr = (mystr mychar) if( pchar == "/" ){ if( mychar == "$" || mychar == "-" ) { pchar = mychar } } else pchar = mychar } ret_val=mystr PDB_form=ret_val} { lx_tu = length(PDB_form) tx_tu = PDB_form PDB_form = "" for (ix_tu = 1; ix_tu <= lx_tu; ++ix_tu) { cx_tu = substr(tx_tu,ix_tu,1) cx_tu = substr((UCAZ cx_tu), \ index((lcaz cx_tu), cx_tu),1) PDB_form = (PDB_form cx_tu) } } rep_name[PDB_form] = name_value xlat_flag = xlat_save } } if (length($0) > 5) { if (length($0) < 80) { $0 = ( ($0) substr(( \ " " \ " ") \ ,1,80-length($0))) } if (length(first_field) > 6 ) first_field = substr($1,1,6) if (first_field != previous_keyword && flag != "0") { printf ("; \n\n") flag = "0" previous_keyword = first_field } else { previous_keyword = first_field } } } else { $0 = "" first_field = "" if (previous_keyword != "" && flag != "0") { printf ("; \n\n") flag = "0" } previous_keyword = "" } } # # Print out any accumulated COMPND, SOURCE, TITLE or CAVEAT information { if (compnd_flag != "1" \ && first_field != "COMPND" \ && first_field != "TITLE" \ && first_field != "CAVEAT" \ && first_field != "SOURCE") { printf ("\n\n") printf ("##################\n") printf ("# #\n") printf ("# STRUCT #\n") printf ("# #\n") printf ("##################\n\n") printf ("loop_\n_struct.entry_id\n_struct.title\n") printf (" %s\n",head_PDB_code) printf ("; %s\n", compnd[1]) for (i=2; i < compnd_flag; ++i) { printf (" %s\n", compnd[i]) } printf ("; \n") compnd_flag = 1 } } #========================================================================= # Keyword ATOM or HETATM or TER # # atom pdb type [ 1- 6] = _atom_site.group_PDB # atom serial number [ 7-11] = _atom_site.id # atom type [13-14] = _atom_site.type_symbol # (first 2 characters of atom name) # atom name [13-16] = _atom_site.label_atom_id # alternate location [17] = _atom_site.label_alt_id # residue name [18-20] = _atom_site.label_comp_id # chain identifier [22] = _atom_site.label_asym_id # residue seq no. [23-26] = _atom_site.auth_seq_id # insertion code [27] = appended to residue sequence no. # x-coordinate [31-38] = _atom_site.cartn_x # y-coordinate [39-46] = _atom_site.cartn_y # z-coordinate [47-54] = _atom_site.cartn_z # occupancy [55-60] = _atom_site.occupancy # temperature factor [61-66] = _atom_site.B_iso_or_equiv # footnote number [68-70] = _atom_site.footnote_id # (February 1992 PDB format) # segment identifier [73-76] = _atom_site.auth_asym_id # (February 1996 PDB format) # element symbol [77-78] = _atom_site.type_symbol # (February 1996 PDB format) # charge on atom [79-80] = append to _atom_site.type_symbol # (February 1996 PDB format) # # Information on non_standard monomers and non-polymers derived from # HET and FORMUL records is presented here using additional information # derived from ATOM and HETATM records. # The assignment of non-standard monomers versus non-polymers # is tricky and unlikely to be correct for all entries. Assignment is # based on the following rules: # i) If the HET has a chain id then it must be non-standard (this # is not complete since single chains do not have an chain id # assigned. # ii)If FORMUL places assigns a HET to a component number among # the SEQRES components, the HET must be non-standard # # { if (first_field == "ATOM" || first_field == "HETATM" \ || first_field == "TER") { xlat_flag=xlat_save # parse field and save ATOM/HETATM/TER info # Since atoms are not necessarily numbered consecutively maintain # a complete conesecutive list 1 -> atom_flag and a partial # list for use by CONECT which references the atom_number # atom_pdb = substr( ($0), 7, 5) atom_number[atom_flag] = substr( ($0), 7, 5) atom_point[atom_number[atom_flag]] = atom_flag atom_name[atom_flag] = substr( ($0), 13, 4) {# # # fix up atom_name by squeezing out blanks in the middle # temp_a_name = atom_name[atom_flag] if (substr( temp_a_name, 3, 1) == " ") { temp_a_name = (substr( temp_a_name, 1, 2) \ substr( temp_a_name, 4, 1) " " ) } if (substr( temp_a_name, 2, 1) == " ") { temp_a_name = (" " substr( temp_a_name, 1, 1) \ substr( temp_a_name, 3, 2) ) } if (temp_a_name == " ") temp_a_name = " . " ret_val=temp_a_name temp_name=ret_val} atom_name[atom_flag] = temp_name residue_name[atom_flag] = substr( ($0), 18, 3) temp_name = substr( temp_name, 1, 2) { lx_tu = length(temp_name) tx_tu = temp_name temp_name = "" for (ix_tu = 1; ix_tu <= lx_tu; ++ix_tu) { cx_tu = substr(tx_tu,ix_tu,1) cx_tu = substr((UCAZ cx_tu), \ index((lcaz cx_tu), cx_tu),1) temp_name = (temp_name cx_tu) } } if ( index(UCAZ, substr( temp_name, 1, 1)) == 0 ) { temp_name = ( " " substr( temp_name, 2, 1) ) } xtemp_name = substr( temp_name, 1, 2) ytemp_name = substr( temp_name, 2, 1) if (substr (xtemp_name, 1, 1) == " ") \ xtemp_name = ytemp_name found = "false" if (first_field == "ATOM") { ++atom_res_flag if (res_count[residue_name[atom_flag]] == "") { res_count[residue_name[atom_flag]] = 0 res_formul[residue_name[atom_flag]] = "." res_name[residue_name[atom_flag]] = "." ++num_OTRESES res_id[num_AARESES+num_NARESES+num_OTRESES] = \ residue_name[atom_flag] warning_list[++warning_flag] = \ ("#=# ATOM_SITE: Residue name " \ residue_name[atom_flag]" not in standard residue list \n") } ++res_count[residue_name[atom_flag]] for (ii = 1; ii <= num_aa_na_el && found == "false"; ++ii) { if (xtemp_name == standard_res_elements[ii]) found = "true" } } else { if (index(xhet_formula[residue_name[atom_flag]],xtemp_name) >0){ found = "true" } else { if (index(xhet_formula[residue_name[atom_flag]], \ (" " ytemp_name " ")) >0){ found = "true" if ( het_conv[(residue_name[atom_flag] \ "|" xtemp_name "|" ytemp_name)] == ""){ warning_list[++warning_flag] = \ ("#=# ATOM_SITE: Het group " \ residue_name[atom_flag]"; atom type " xtemp_name \ " converted to " ytemp_name "\n") het_conv[(residue_name[atom_flag] \ "|" xtemp_name "|" ytemp_name)] = "done" } xtemp_name = ytemp_name temp_name = ytemp_name } else { for (ii = 1; ii <= num_per_tab && found == "false"; ++ii) { if (xtemp_name == periodic_table[ii]) found = "true" } } } } if (found == "false") { temp_name = " ." if (first_field == "ATOM") { for (ii = 1; ii <= num_aa_na_el && found == "false"; ++ii) { if (ytemp_name == standard_res_elements[ii]) found = "true" } } else { for (ii = 1; ii <= num_one_let_el && found == "false"; ++ii) { if (ytemp_name == one_letter_elements[ii]) found = "true" } } if ( found == "true") { temp_name = ( " " ytemp_name ) } warning_list[++warning_flag] = \ ("#=# ATOM_SITE: Site " atom_pdb"; unexpected atom type " xtemp_name \ " converted to " temp_name "\n") } atom_type[atom_flag] = substr( temp_name, 1, 2) atom_alt_location[atom_flag] = substr( ($0), 17, 1) chain_id[atom_flag] = substr( ($0), 22, 1) residue_seq_number[atom_flag] = substr( ($0), 23, 5) atom_x[atom_flag] = substr( ($0), 31, 8) atom_y[atom_flag] = substr( ($0), 39, 8) atom_z[atom_flag] = substr( ($0), 47, 8) atom_occ[atom_flag] = substr( ($0), 55, 6) B_or_U[atom_flag] = substr( ($0), 61, 6) footnote_number[atom_flag] = substr( ($0), 68, 3) if (compliance_level >= 2.0) { atom_seg_id[atom_flag] = substr( ($0), 73, 4) atom_type[atom_flag] = substr( ($0), 77, 4) } {# # # fix up atom_type (atom symbol and charge) # temp_a_type = ( atom_type[atom_flag] " ") orig_charge = substr( temp_a_type, 3, 2) if (orig_charge != " ") { if (substr( temp_a_type, 3, 1) == " ") { temp_a_type = (substr( temp_a_type, 1, 2) \ substr( temp_a_type, 4, 1) " " ) orig_charge = substr( temp_a_type, 3, 2) } temp_charge = charge[ orig_charge ] if ( temp_charge != "") \ temp_a_type = (substr( temp_a_type, 1, 2) temp_charge ) } if (substr( temp_a_type, 2, 1) == " ") { temp_a_type = (" " substr( temp_a_type, 1, 1) \ substr( temp_a_type, 3, 2) ) } if (temp_a_type == " ") temp_a_type = " . " if (substr( (temp_a_type " "), 3, 2) == " ") { temp_a_type = substr(temp_a_type, 1, 2) } ret_val=temp_a_type temp_type=ret_val} atom_type[atom_flag] = temp_type if (atom_x[atom_flag] == " " ) { atom_x[atom_flag] = " . " } if (atom_y[atom_flag] == " " ) { atom_y[atom_flag] = " . " } if (atom_z[atom_flag] == " " ) { atom_z[atom_flag] = " . " } if (atom_occ[atom_flag] == " " ) { atom_occ[atom_flag] = " . " } if (B_or_U[atom_flag] == " " ) { B_or_U[atom_flag] = " . " } atom_model[atom_flag] = model_flag # # # flag atom as ATOM or HETATM or TER # if (first_field == "ATOM") atom_or_het[atom_flag] = "ATOM" if (first_field == "HETATM") atom_or_het[atom_flag] = "HETATM" if (first_field == "TER") atom_or_het[atom_flag] = "TER" # # set alternate location value if blank # if (atom_alt_location[atom_flag] == " ") \ atom_alt_location[atom_flag] = "." # # make a list of alternative atoms # if (atom_alt_location[atom_flag] != ".") { at_alt = atom_alt_location[atom_flag] atom_alt_list[at_alt]++ } # # set footnote value if blank # if (footnote_number[atom_flag] == " ") \ footnote_number[atom_flag] = " . " # # set chain_id and entity_id to bcid for ATOM records if blank # if ( (first_field == "ATOM" || first_field == "TER") \ && chain_id[atom_flag] == " ") { chain_id[atom_flag] = bcid entity_id[atom_flag] = bcid } # # set chain_id to . for HETATM records if blank # if (first_field == "HETATM" && chain_id[atom_flag] == " ") { chain_id[atom_flag] = "." if (num_poly_ents == 1 && entities[1] == bcid) { chain_id[atom_flag] = bcid } } # # set entity_id to chain_id for ATOM and TER records # if (first_field == "ATOM" && chain_id[atom_flag] != " ") { entity_id[atom_flag] = chain_id[atom_flag] } if (first_field == "TER" && chain_id[atom_flag] != " ") { entity_id[atom_flag] = chain_id[atom_flag] } # # set _entity.id to residue_name for HETATM records # if (first_field == "HETATM") { entity_id[atom_flag] = residue_name[atom_flag] hetatm_entity = residue_name[atom_flag] ent_non_poly_id[hetatm_entity]++ if (ent_non_poly_id[hetatm_entity] == 1) { next_non_poly_id=ent_non_poly_point[" "] prev_non_poly_id=" " while(next_non_poly_id != "") { prev_non_poly_id = next_non_poly_id next_non_poly_id = ent_non_poly_point[prev_non_poly_id] } ent_non_poly_point[prev_non_poly_id] = hetatm_entity ent_non_poly_point[hetatm_entity] = "" ++num_non_poly_ents ent_non_poly_num[hetatm_entity] = num_non_poly_ents } if (entity_seq_num[residue_name[atom_flag]] != "" &&\ entity_seq_num[residue_name[atom_flag]]+0 <= \ num_poly_ents ) \ entity_id[atom_flag] = chain_id[atom_flag] } # # define _entities for polypeptide chains or DNA strands # ie these are _entity_poly. Done by checking for chain in chain_id # in ATOM records if (first_field == "ATOM") { atom_entity = chain_id[atom_flag] ent_poly_id[atom_entity]++ if (ent_poly_id[atom_entity] == 1) { next_poly_id=ent_poly_point[" "] prev_poly_id=" " while(next_poly_id != "") { prev_poly_id = next_poly_id next_poly_id = ent_poly_point[prev_poly_id] } ent_poly_point[prev_poly_id] = atom_entity ent_poly_point[atom_entity] = "" ++num_poly_ents ent_poly_num[atom_entity] = num_poly_ents entity_seq_num[atom_entity] = num_poly_ents entities[num_poly_ents] = atom_entity } } ++atom_flag } } #===================================================================== # Keyword ANISOU # # # atom serial number = matched via pointers to ATOM/HETATM # atom type = dropped, taken from ATOM/HETATM # atom name = dropped, taken from ATOM/HETATM # alternate location = dropped, taken from ATOM/HETATM # residue name = dropped, taken from ATOM/HETATM # chain identifier = dropped, taken from ATOM/HETATM # residue sequence no. = dropped, taken from ATOM/HETATM # insertion code = dropped, taken from ATOM/HETATM # # # # Note the different order # PDB CIF # 1. U[1][1] U[1][1] # 2. U[2][2] U[1][2] # 3. U[3][3] U[1][3] # 4. U[1][2] U[2][2] # 5. U[1][3] U[2][3] # 6. U[2][3] U[3][3] # { if (first_field == "ANISOU") { xlat_flag=xlat_save # parse field ++aniso_flag a_atom_serial_number[aniso_flag] = substr( ($0), 7, 5) aniso_point[a_atom_serial_number[aniso_flag]] = aniso_flag atom_U11[aniso_flag] = substr( ($0), 29, 7) atom_U22[aniso_flag] = substr( ($0), 36, 7) atom_U33[aniso_flag] = substr( ($0), 43, 7) atom_U12[aniso_flag] = substr( ($0), 50, 7) atom_U13[aniso_flag] = substr( ($0), 57, 7) atom_U23[aniso_flag] = substr( ($0), 64, 7) } } #==================================================================== # Keyword AUTHOR # # Loop over authors as "_audit_author..." { if (first_field == "AUTHOR") { xlat_flag=xlat_save # parse record creating an array of authors text = substr( ($0), 11, 60) cont = substr( ($0), 9, 2) num_auth = split(text,authors,comma ) for (i=1; i <= num_auth; ++i){ num_a_split = split(authors[i],a_split," ") authors[i] = "" if (num_a_split > 0) { authors[i] = a_split[1] for (j=2; j <= num_a_split; ++j) { authors[i] = (authors[i] " " a_split[j]) } } if (auth_convtext == "yes" || \ (auth_convtext == "conditional" && convtext == "yes")) { {# # produce a CIF-style name from a PDB name # # begin by applying typesetting codes if any # but always treat "-" and "'" as breaks for capitalization # in names # { lx_tl = length(authors[i]) tx_tl = authors[i] lostr = "" for (ix_tl = 1; ix_tl <= lx_tl; ++ix_tl) { cx_tl = substr(tx_tl,ix_tl,1) cx_tl = substr((lcaz cx_tl), \ index((UCAZ cx_tl), cx_tl),1) lostr = (lostr cx_tl) } } lstr = length(lostr) mystr = "" pchar = " " for( qnsi=1; qnsi <= lstr; ++qnsi) { mychar = substr( lostr, qnsi, 1) if( pchar == " " || \ pchar == "," || \ pchar == "." || \ pchar == "-" || \ pchar == "'" || \ pchar == "(" || \ pchar == "*" || \ pchar == "/" ) { mychar = substr((UCAZ mychar),\ index((lcaz mychar), mychar),1) } if( ( mychar != "*" && mychar != "$" && mychar != "/" ) \ || (mychar == pchar) ) mystr = (mystr mychar) if( pchar == "/" ){ if( mychar == "$" || mychar == "-" ) { pchar = mychar } # end if( mychar == "$" || mychar == "-" ) } else { pchar = mychar } # end if( pchar == "/" ) } # end for( qnsi=1; qnsi <= lstr; ++qnsi) # # See if a specific replacement was given # { lx_tu = length(mystr) tx_tu = mystr name_temp = "" for (ix_tu = 1; ix_tu <= lx_tu; ++ix_tu) { cx_tu = substr(tx_tu,ix_tu,1) cx_tu = substr((UCAZ cx_tu), \ index((lcaz cx_tu), cx_tu),1) name_temp = (name_temp cx_tu) } } if (rep_name[name_temp] != "") mystr = rep_name[name_temp] # # See if there is a comma in place if so we are done # if (index(mystr,comma) != 0 ){ ret_val=mystr } else { nam_suf = "" num_namp = split(mystr,x_namep," ") if (num_namp > 1) { { lx_tu = length(x_namep[num_namp]) tx_tu = x_namep[num_namp] xtemp = "" for (ix_tu = 1; ix_tu <= lx_tu; ++ix_tu) { cx_tu = substr(tx_tu,ix_tu,1) cx_tu = substr((UCAZ cx_tu), \ index((lcaz cx_tu), cx_tu),1) xtemp = (xtemp cx_tu) } } if (rep_suffix[xtemp] != ""){ if(junior_on_last == "yes") { x_namep[num_namp] = rep_suffix[xtemp] } else { nam_suf = (" " rep_suffix[xtemp] ) --num_namp } } mystr = x_namep[1] for (knamp = 2; knamp <= num_namp; ++knamp) { mystr = (mystr " " x_namep[knamp]) } } # end if (num_namp > 1) llname = length(mystr) cc = "" for (kc=llname-1; kc>1; --kc) { cp = cc cc = substr(mystr,kc,1) if (cc == ".") { if (cp != " ") { mystr = (substr(mystr,kc+1,llname-kc) comma " " \ substr(mystr,1,kc)) } else { mystr = (substr(mystr,kc+2,llname-kc-1) comma " " \ substr(mystr,1,kc)) } # if (cp != " ") kc=0 } # end if (cc == ".") } # for (kc=llname-1; kc>1; --kc) mystr = (mystr nam_suf) ret_val=mystr } # end if (index(mystr,comma) != 0 ) authors[i]=ret_val} } } is_blank = authors[num_auth] if (is_blank == "") --num_auth if (num_auth >= 1 && author_flag == "1") { printf ("\n\n\n") printf ("####################\n") printf ("# #\n") printf ("# AUDIT_AUTHOR #\n") printf ("# #\n") printf ("####################\n\n\n") printf ("loop_ \n") printf ("_audit_author.name \n" ) } for (i=1; i <= num_auth; ++i) { printf ("'%s' \n", authors[i]) } if (num_auth > 0) ++author_flag } } #=========================================================================== # Keyword CAVEAT # # In the 1995 format, a new record, CAVEAT, was added to warn of severe # errors in an entry. # # caveat_cont [9-10] # caveat_id [12-15] # caveat_text [20-70] = _struct.title { if (first_field == "CAVEAT") { xlat_flag=xlat_save caveat_cont = substr( ($0), 9, 2) caveat_id = substr( ($0), 12, 4) caveat_text = substr( ($0), 20, 51) if (caveat_cont == " ") { compnd[compnd_flag++] = "Warning of Severe Error::" } bp = " " if (caveat_cont != " ") bp = " " compnd[compnd_flag++] = (bp caveat_text) } } #=========================================================================== # Keyword CISPEP # # Introduced with the February 1996 PDB format # # cp_sernum [ 8-10] # cp_res_name_beg [12-14] # cp_chain_id_beg [16] # cp_res_seq_num_beg [18-21] # cp_icode_beg [22] # cp_res_name_end [26-28] = _struct_mon_prot.label_comp_id # _struct_mon_prot_cis.label_comp_id # cp_chain_id_end [30] = _struct_mon_prot.label_asym_id # _struct_mon_prot_cis.label_asym_id # cp_res_seq_num_end [32-35] = _struct_mon_prot.auth_seq_id # _struct_mon_prot_cis.auth_seq_id # cp_icode_end [36] append to # _struct_mon_prot.auth_seq_id # _struct_mon_prot_cis.auth_seq_id # cp_modnum [44-46] = _struct_mon_prot.label_model_id # _struct_mon_prot_cis.label_model_id # cp_omega [54-59] = _struct_mon_prot.omega { if (first_field == "CISPEP") { xlat_flag=xlat_save cp_res_name_end[++cispep_flag] = substr( ($0),26, 3) cp_chain_id_end[cispep_flag] = substr( ($0),30, 1) cp_res_seq_num_end[cispep_flag] = substr( ($0),32, 5) cp_modnum[cispep_flag] = substr( ($0),44, 3) cp_omega[cispep_flag] = substr( ($0),54, 6) if (cp_res_name_end[cispep_flag] == " ") \ cp_res_name_end[cispep_flag] = " . " if (cp_chain_id_end[cispep_flag] == " ") \ cp_chain_id_end[cispep_flag] = bcid if (cp_modnum[cispep_flag] == " ") { cp_modnum[cispep_flag] = " . " } if (cp_omega[cispep_flag] == " ") { cp_omega[cispep_flag] = " . " } } } #========================================================================== # keyword CRYST1 # # { if (first_field == "CRYST1") { xlat_flag=xlat_save # # Contains a b c alpha beta gamma SG Z # # calculate cell volume { ca = cos(substr( ($0),34, 7) * 0.0174532) cb = cos(substr( ($0),41, 7) * 0.0174532) cc = cos(substr( ($0),48, 7) * 0.0174532) cz = (1.0 - (ca*ca - cb*cb - cc*cc) + (2.0*ca*cb*cc)) vol = (substr( ($0), 7, 9) *\ substr( ($0),16, 9) *\ substr( ($0),25, 9) * (sqrt(cz))) if (vol-1 < .01) { warning_list[++warning_flag] = \ "#=# CELL: The volume is 1, may be model or NMR, read REMARKs\n" ++vol_flag } } # localize space group and Z { sg = substr( ($0), 56, 11) Z = substr( ($0), 67, 4 ) } printf ("\n") printf ("_cell.entry_id %s\n",head_PDB_code) printf ("_cell.length_a %9.3f\n", substr( ($0), 7, 9)) printf ("_cell.length_b %9.3f\n", substr( ($0),16, 9)) printf ("_cell.length_c %9.3f\n", substr( ($0),25, 9)) printf ("_cell.angle_alpha %7.2f\n", substr( ($0),34, 7)) printf ("_cell.angle_beta %7.2f\n", substr( ($0),41, 7)) printf ("_cell.angle_gamma %7.2f\n", substr( ($0),48, 7)) printf ("_cell.volume %10.1f \n", vol) printf ("_cell.details ? \n") printf ("_cell.Z_PDB %3d \n\n", Z) printf ("_symmetry.entry_id %s \n", head_PDB_code) printf ("_symmetry.space_group_name_H-M '%11s' \n\n", sg) if (verbose == "yes") { printf ("_cell_measurement.temp ? \n") printf ("_cell_measurement.theta_min ? \n") printf ("_cell_measurement.theta_max ? \n") printf ("_cell_measurement.wavelength ? \n") printf ("_cell_measurement.pressure ? \n") printf ("_cell_measurement.radiation ? \n") printf ("_cell_measurement.reflns_used ? \n\n") printf ("loop_\n") printf ("_cell_measurement_refln.index_h \n") printf ("_cell_measurement_refln.index_k \n") printf ("_cell_measurement_refln.index_l \n") printf ("_cell_measurement_refln.theta \n") printf (" ? ? ? ? \n") } } } #====================================================================== # Keyword COMPND # # This is considered a common name for the macromolecule # in the 1992 format, and a more detailed description with # keywords in the 1995 format. In either case the entire # COMPND record is added to the information used for # _struct.title along with the information from TITLE, # SOURCE and CAVEAT # # record name [ 1 - 6] = "COMPND" # continuation flag [ 9 - 10] = blank for first record # compound [11 - 70] = _struct.title # # { if (first_field == "COMPND") { xlat_flag=xlat_save compnd_contin = substr( ($0), 9, 2) if (compnd_contin == " ") \ compnd[compnd_flag++] = "Compound::" bp = " " if (compnd_contin != " ") bp = " " compnd[compnd_flag] = (bp substr( ($0), 11, 60)) # typeset information, if requested if (convtext == "yes") \ {# # apply PDB typsetting codes if any to a line # { lx_tl = length(compnd[compnd_flag] ) tx_tl = compnd[compnd_flag] lostr = "" for (ix_tl = 1; ix_tl <= lx_tl; ++ix_tl) { cx_tl = substr(tx_tl,ix_tl,1) cx_tl = substr((lcaz cx_tl), \ index((UCAZ cx_tl), cx_tl),1) lostr = (lostr cx_tl) } } lstr = length(lostr) mystr = "" pchar = " " for( qtsi=1; qtsi <= lstr; ++qtsi) { mychar = substr( lostr, qtsi, 1) if( pchar == " " || \ pchar == "," || \ pchar == "." || \ pchar == "(" || \ pchar == "*" || \ pchar == "/" ) { mychar = substr((UCAZ mychar),\ index((lcaz mychar), mychar),1) } if( ( mychar != "*" && mychar != "$" && mychar != "/" ) \ || (mychar == pchar) ) mystr = (mystr mychar) if( pchar == "/" ){ if( mychar == "$" || mychar == "-" ) { pchar = mychar } } else pchar = mychar } ret_val=mystr compnd[compnd_flag]=ret_val} ++compnd_flag } } #====================================================================== # Keyword CONECT # # Origin serial number = _struct_conn.ptnr1_label_comp_id # = _struct_conn.ptnr1_label_asym_id # = _struct_conn.ptnr1_auth_seq_id # = _struct_conn.ptnr1_label_atom_id # = _struct_conn.ptnr1_label_alt_id # Target serial numbers = _struct_conn.ptnr2_label_comp_id # = _struct_conn.ptnr2_label_asym_id # = _struct_conn.ptnr2_auth_seq_id # = _struct_conn.ptnr2_label_atom_id # = _struct_conn.ptnr2_label_alt_id # Hydrogen bond donor = _struct_conn.conn_type_id # Hydrogen bond acceptor = _struct_conn.conn_type_id # Salt bridge excess -ve = _struct_conn.conn_type_id # Salt bridge excess +ve = _struct_conn.conn_type_id # # _struct_conn.id = incremental number assigned to each # CONECT record # _struct_conn.conn_type_id = matches generic _struct_conn_type.criteria # # all atoms at 1_555 ie no support for -ve targets # No special details included # { if (first_field == "CONECT") { xlat_flag=xlat_save connect_save[++connect_flag] = substr( ($0), 1, 61) ++conect_flag_2 } } #=========================================================================== # Keyword DBREF # # In the 1995 format, a new record, DBREF, was added to provide # "cross-reference links between PDB and the corresponding sequence # database entries." The citations may be to subchains specified # by PDB sequence number and insertion code ranges. # # DBREF [1- 5] # dbref_idcode [8-11] = idcode of this entry # dbref_chainID [ 13 ] = _struct_asym.id # = _struct_ref.biol_id # dbref_seqBegin [15-19] = combines seqBegin and insertBegin # used to obtain start point in _entity_poly_seq.num # then mapped to _struct_ref_seq.seq_align_beg # dbref_seqEnd [21-25] = combines seqEnd and insertEnd # used to obtain start point in _entity_poly_seq.num # then mapped to _struct_ref_seq.seq_align_end # dbref_database [27-32] = _struct_ref.db_name # dbref_dbAccession # [34-41] = _struct_ref.db_code # dbref_dbIdCode [43-54] = add to _struct_ref.db_code # dbref_dbseqBeg [56-61] = _struct_ref_seq.db_align_beg # dbref_dbseqEnd [63-68] = _struct_ref_seq.db_align_end # # Note: as of this writing, _struct_ref_seq_dif.db_seq_num is # not in the mmCIF dictionary. # # if the database is PDB, columns 61 and 68 contain an insertion code # for other databases, these columns are blank { if (first_field == "DBREF") { xlat_flag=xlat_save dbref_chainID[++dbref_flag] = substr( ($0), 13, 1) if (dbref_chainID[dbref_flag] == " ") \ dbref_chainID[dbref_flag] = bcid dbref_seqBegin[dbref_flag] = substr( ($0), 15, 5) dbref_seqEnd[dbref_flag] = substr( ($0), 21, 5) dbref_database[dbref_flag] = substr( ($0), 27, 6) dbref_dbAccession[dbref_flag] = substr( ($0), 34, 8) dbref_dbIdCode[dbref_flag] = substr( ($0), 43, 12) dbref_dbseqBeg[dbref_flag] = substr( ($0), 56, 6) dbref_dbseqEnd[dbref_flag] = substr( ($0), 63, 6) numx = split(dbref_database[dbref_flag],dblist," ") dbref_database[dbref_flag] = "." if (numx > 0) { dbref_database[dbref_flag] = dblist[1] for (j=2; j <= numx; ++j) { dbref_database[dbref_flag] = \ ( dbref_database[dbref_flag] "_" dblist[j] ) } } numx = split(dbref_dbAccession[dbref_flag],dblist," ") dbref_dbAccession[dbref_flag] = "." if (numx > 0) { dbref_dbAccession[dbref_flag] = dblist[1] for (j=2; j <= numx; ++j) { dbref_dbAccession[dbref_flag] = \ ( dbref_dbAccession[dbref_flag] "_" dblist[j] ) } } numx = split(dbref_dbIdCode[dbref_flag],dblist," ") dbref_dbIdCode[dbref_flag] = "" if (numx > 0) { dbref_dbIdCode[dbref_flag] = (" "dblist[1]) for (j=2; j <= numx; ++j) { dbref_dbIdCode[dbref_flag] = \ ( dbref_dbIdCode[dbref_flag] "_" dblist[j] ) } } if((" " dbref_dbAccession[dbref_flag]) == \ dbref_dbIdCode[dbref_flag] ) dbref_dbIdCode[dbref_flag] = "" } } #========================================================================== # keyword END # # terminates processing of records, but remainder of file is read # # { if (first_field == "END") { xlat_flag=xlat_save ++end_flag } } #============================================================================= # Keyword ENDMDL { if (first_field == "ENDMDL") { xlat_flag=xlat_save model_flag = "." } } #==================================================================== # Keyword EXPDTA # # expdta [11-70] = _exptl.method # { if (first_field == "EXPDTA") { xlat_flag=xlat_save # parse field expdta = substr( ($0), 11, 60) nmr_flag = index(expdta,"NMR") { lx_tl = length(expdta) tx_tl = expdta loexpdta = "" for (ix_tl = 1; ix_tl <= lx_tl; ++ix_tl) { cx_tl = substr(tx_tl,ix_tl,1) cx_tl = substr((lcaz cx_tl), \ index((UCAZ cx_tl), cx_tl),1) loexpdta = (loexpdta cx_tl) } } num_expdta = split(loexpdta,exp_split," ") loexpdta = "" if (num_expdta > 0) { loexpdta= exp_split[1] for (j=2; j <= num_expdta; ++j) { loexpdta = (loexpdta " " exp_split[j]) } } expwarn = "true" if (loexpdta == \ "x-ray diffraction"){ loexpdta = "single-crystal x-ray diffraction" expwarn = "false" } if (loexpdta == \ "theoretical model"){ expwarn = "false" } if (expwarn == "true") { warning_list[++warning_flag] = \ ("#=# EXPTL: Non-enumerated method: " loexpdta "\n") } printf ("_exptl.entry_id %s\n", head_PDB_code) printf ("_exptl.method '%-s'\n", loexpdta) } } #====================================================================== # Keyword FORMUL - chemical formula of non-standard groups # # component number == _entity.id & _chem_comp.entity_id # het identifier == _entity_name_common & _chem_comp.id # het_formula_mw == ignored # het_formula_text == _chem_comp.formula # ?? == _entity_special_details # # Information written in ATOM/HETATM keyword { if (first_field == "FORMUL") { xlat_flag=xlat_save # parse field formul_het_number[formul_flag] = substr( ($0), 9, 2)+0 formul_het_site_symbol[formul_flag] = substr( ($0), 13, 3) hetatm_entity = formul_het_site_symbol[formul_flag] formul_het_cont_flag[formul_flag] = substr( ($0), 17, 2) hetatm_entity = substr( ($0), 13, 3) entity_seq_num[hetatm_entity] = \ formul_het_number[formul_flag]+0 formul_het_text[formul_flag] = substr( ($0), 20,51) if ( substr( ($0), 17, 2) == " " ) { het_formula[hetatm_entity] = ("\n; " \ formul_het_text[formul_flag]) } else { het_formula[hetatm_entity] = (het_formula[hetatm_entity] \ "\n " formul_het_text[formul_flag]) } pxxc = "" for (ii = 0; ii <= 50; ++ii) { xxc = substr( ($0), 20+ii, 1) if ( index(UCAZ, xxc) == 0) xxc = " " if (pxxc != " " || xxc != " ") \ xhet_formula[hetatm_entity] = \ (xhet_formula[hetatm_entity] xxc) pxxc = xxc } xhet_formula[hetatm_entity] = (xhet_formula[hetatm_entity] " ") ent_non_poly_id[hetatm_entity]++ if (ent_non_poly_id[hetatm_entity] == 1) { next_non_poly_id=ent_non_poly_point[" "] prev_non_poly_id=" " while(next_non_poly_id != "") { prev_non_poly_id = next_non_poly_id next_non_poly_id = ent_non_poly_point[prev_non_poly_id] } ent_non_poly_point[prev_non_poly_id] = hetatm_entity ent_non_poly_point[hetatm_entity] = "" ++num_non_poly_ents ent_non_poly_num[hetatm_entity] = \ formul_het_number[formul_flag] } ++formul_flag # Set up to read addiional entities from ATOM records (entity_flag = formul_flag - 1) } } #========================================================================= # keyword FTNOTE -- footnote to atoms or residues # # footnote number == _atom_sites_footnote.id # footnote text == _atom_sites_footnote.text { if (first_field == "FTNOTE" ) { xlat_flag=xlat_save x = substr ( ($0), 10, 1) if (ftnote_flag[1] == "0" ) { ft_save[++foot_flag]="\nloop_\n" ft_save[++foot_flag]="_atom_sites_footnote.id \n" ft_save[++foot_flag]="_atom_sites_footnote.text \n" } ftnote_num = substr ( ($0), 10, 1) ftnote_text = substr ( ($0), 12, 59) if (ftnote_num > ftnote_flag_old ) { ft_save[++foot_flag]="; \n" } if (ftnote_flag[ftnote_num] == "0") { ft_save[++foot_flag]=(" " ftnote_num "\n") } if (ftnote_flag[ftnote_num] == "0") { ft_save[++foot_flag]=("; " ftnote_text "\n") ++ftnote_flag[ftnote_num] } else { ft_save[++foot_flag]=(" " ftnote_text "\n") ++ftnote_flag[ftnote_num] } ftnote_flag_old = ftnote_num } } #==================================================================== # Keyword HEADER # # This is a good place to place the _struct_biol data items. Templates # are given but no information has been parsed excluding # _special_details. # # head_funct_class [11-50] == _struct_biol.details # head_dep_date [51-59] == _database_PDB_rev.date_original # _audit.creation_date # head_PDB_code [63-66] == _database_2.database_code # _struct_biol.id # _audit.revision_id { if (first_field == "HEADER") { xlat_flag=xlat_save head_funct_class = substr ( ($0), 11, 40) head_dep_date = substr ( ($0), 51, 9) head_PDB_code = substr ( ($0), 63, 4) if (head_PDB_code == " ") { head_PDB_code = "." } # # Output the PDB code immediately as the data block name # printf("data_%4s\n\n",head_PDB_code) printf("_entry.id %4s\n\n",head_PDB_code) # # save the header id as a possible audit.revision_id # aud_rev_id = head_PDB_code } } #======================================================================= # Keyword HELIX # # 8 - 10 helix_no. == (not used) # 12 - 14 helix_id == _struct_conf.id # 16 - 18 helix_res_name_beg == _struct_conf.beg_label_comp_id # 20 helix_chain_id_beg == _struct_conf.beg_label_asym_id # 22 - 26 helix_res_seq_beg == _struct_conf.beg_auth_seq_id # 28 - 30 helix_res_name_end == _struct_conf.end_label_comp_id # 32 helix_chain_id_end == _struct_conf.end_label_asym_id # 34 - 38 helix_res_seq_end == _struct_conf.end_auth_seq_id # 39 - 40 helix_class == _struct_conf.conf_type_id # 41 - 70 helix_comment == _struct_conf.details # # note helix classes 9 and 10 as defined by the PDB do not have CIF # definitions # # { if (first_field == "HELIX") { xlat_flag=xlat_save helix_no[ss_flag] = substr( ($0), 8, 3) helix_id[ss_flag] = substr( ($0),12, 3) helix_res_name_beg[ss_flag] = substr( ($0),16, 3) helix_chain_id_beg[ss_flag] = substr( ($0),20, 1) helix_res_seq_beg[ss_flag] = substr( ($0),22, 5) helix_res_name_end[ss_flag] = substr( ($0),28, 3) helix_chain_id_end[ss_flag] = substr( ($0),32, 1) helix_res_seq_end[ss_flag] = substr( ($0),34, 5) helix_class[ss_flag] = substr( ($0),39, 2) helix_comment[ss_flag] = substr( ($0),41,30) if (helix_comment[ss_flag] == " " || \ helix_comment[ss_flag] == "") { helix_comment[ss_flag] = " . " if (helix_class[ss_flag]+0 == 1) \ helix_comment[ss_flag] = "RIGHT-HANDED ALPHA HELIX" if (helix_class[ss_flag]+0 == 2) \ helix_comment[ss_flag] = "RIGHT-HANDED OMEGA HELIX" if (helix_class[ss_flag]+0 == 3) \ helix_comment[ss_flag] = "RIGHT-HANDED PI HELIX" if (helix_class[ss_flag]+0 == 4) \ helix_comment[ss_flag] = "RIGHT-HANDED GAMMA HELIX" if (helix_class[ss_flag]+0 == 5) \ helix_comment[ss_flag] = "RIGHT-HANDED 3/10 HELIX" if (helix_class[ss_flag]+0 == 6) \ helix_comment[ss_flag] = "LEFT-HANDED ALPHA HELIX" if (helix_class[ss_flag]+0 == 7) \ helix_comment[ss_flag] = "LEFT-HANDED OMEGA HELIX" if (helix_class[ss_flag]+0 == 8) \ helix_comment[ss_flag] = "LEFT-HANDED GAMMA HELIX" if (helix_class[ss_flag]+0 == 9) \ helix_comment[ss_flag] = "2/7 RIBBON/HELIX" if (helix_class[ss_flag]+0 == 10) \ helix_comment[ss_flag] = "POLYPROLINE" } # strip blanks from id num_x = split(helix_id[ss_flag],xxx," ") helix_id[ss_flag] = "" if (num_x == 1) helix_id[ss_flag] = xxx[1] if (num_x == 2) helix_id[ss_flag] = (xxx[1] "_" xxx[2]) # provide default conditions if (helix_chain_id_beg[ss_flag] == " ") \ helix_chain_id_beg[ss_flag] = bcid if (helix_chain_id_end[ss_flag] == " ") \ helix_chain_id_end[ss_flag] = bcid # give real names to helix classes h_class_suffix = "_P" num_x = split((helix_res_name_end[ss_flag] " " \ helix_res_name_beg[ss_flag]),xxx," ") for(i in na_list) { if (na_list[i] == xxx[1] || \ na_list[i] == xxx[2] ) h_class_suffix = "_N" } if (helix_class[ss_flag] == " 1") \ helix_class[ss_flag] = ("HELX_RH_AL" h_class_suffix) if (helix_class[ss_flag] == " 2") \ helix_class[ss_flag] = ("HELX_RH_OM" h_class_suffix) if (helix_class[ss_flag] == " 3") \ helix_class[ss_flag] = ("HELX_RH_PI" h_class_suffix) if (helix_class[ss_flag] == " 4") \ helix_class[ss_flag] = ("HELX_RH_GA" h_class_suffix) if (helix_class[ss_flag] == " 5") \ helix_class[ss_flag] = ("HELX_RH_3T" h_class_suffix) if (helix_class[ss_flag] == " 6") \ helix_class[ss_flag] = ("HELX_LH_AL" h_class_suffix) if (helix_class[ss_flag] == " 7") \ helix_class[ss_flag] = ("HELX_LH_OM" h_class_suffix) if (helix_class[ss_flag] == " 8") \ helix_class[ss_flag] = ("HELX_LH_GA" h_class_suffix) if (helix_class[ss_flag] == " 9") \ helix_class[ss_flag] = ("HELX_27" h_class_suffix) if (helix_class[ss_flag] == "10") \ helix_class[ss_flag] = ("HELX_PP" h_class_suffix) ++ss_flag ++helix_flag } } #=================================================================== # Keyword HET # # het_site_symbol [ 8 - 10] == to link to _entity.id from FORMUL # het_site_chain [13] == ???? # het_site_seqNum [14 - 17] == sequence no, or -999 if more than 15 # het_site_iCode [18] == append to seq no # het_atoms_number [21 - 25] == THIS IS THE NUMBER OF HETATM LINES # NOT A COUNT OF ATOMS # By careful processing of the HETATM # information, paying attention to # occupancies, this number could # be related to # _chem_comp.number_atoms_all or # _chem_comp.number_atoms_nh # but we do not attempt this # het_site_text [31-70] == _chem_comp.details # # { if (first_field == "HET") { xlat_flag=xlat_save # parse field het_site_symbol[het_flag] = substr( ($0), 8, 3) het_site_chain[het_flag] = substr( ($0), 13, 1) het_site_residue[het_flag] = substr( ($0), 14, 5) het_atoms_number[het_flag] = substr( ($0), 21, 4) het_site_text[het_site_symbol[het_flag]] \ = substr( ($0), 31,40) if ( het_site_chain[het_flag] == " ") { het_site_chain[het_flag] = "." } ++het_flag } } #=================================================================== # Keyword HETNAM # # hetnam_cont [ 9 - 10] == continuation flag # hetnam_symbol [12 - 14] == to link to entity_id from FORMUL # hetnam_text [16 - 70] == text of chemical name # _chem_comp.name and # _entity_name_com.name # { if (first_field == "HETNAM") { xlat_flag=xlat_save # parse field hetnam_cont[hetnam_flag] = substr( ($0), 9, 2) hetnam_symbol[hetnam_flag] = substr( ($0), 12, 3) hetnam_text[hetnam_flag] = substr( ($0), 16,55) if( hetnam_cont[hetnam_flag] == " "){ het_site_name[hetnam_symbol[hetnam_flag]] \ = (" " substr( ($0), 16,55)) } else { het_site_name[hetnam_symbol[hetnam_flag]] \ = (het_site_name[hetnam_symbol[hetnam_flag]] \ "\n " substr( ($0), 16,55)) } ++hetnam_flag } } #=================================================================== # Keyword HETSYN # # hetsyn_cont [ 9 - 10] == continuation flag # hetsyn_symbol [12 - 14] == to link to entity_id from FORMUL # hetsyn_text [16 - 70] == text of chemical name # _entity_name_com.name # { if (first_field == "HETSYN") { xlat_flag=xlat_save # parse field hetsyn_cont[hetsyn_flag] = substr( ($0), 9, 2) hetsyn_symbol[hetsyn_flag] = substr( ($0), 12, 3) hetsyn_text[hetsyn_flag ] = substr( ($0), 16,55) if( hetsyn_cont[hetsyn_flag] == " ") { het_site_syn[hetsyn_symbol[hetsyn_flag]] \ = (substr( ($0), 16,55)) } else { het_site_syn[hetsyn_symbol[hetsyn_flag]] \ = (het_site_syn[hetsyn_symbol[hetsyn_flag]] \ substr( ($0), 16,55)) } ++hetsyn_flag } } #=========================================================================== # Keyword HYDBND # # Introduced with the February 1996 PDB format # # There is no way to define the hydrogen atom of a hydrogen bond in mmCIF # we treat it as a partner in a hydrogen bond with role "hydrogen" # # # hb_atom_beg [13-16] = _struct_conn.ptnr1_label_atom_id # hb_alt_loc_beg [17] = _struct_conn.ptnr1_label_alt_id # hb_res_name_beg [18-20] = _struct_conn.ptnr1_label_comp_id # hb_chain_id_beg [22] = _struct_conn.ptnr1_label_asym_id # hb_res_seq_num_beg [24-27] = _struct_conn.ptnr1_auth_seq_id # hb_icode_beg [28] append to # _struct_conn.ptnr1_auth_seq_id # hb_name_ha [30-33] = _struct_conn.ptnr1_label_atom_id # hb_alt_loc_ha [34] = _struct_conn.ptnr1_label_alt_id # hb_chain_id_ha [36] = _struct_conn.ptnr1_label_asym_id # hb_res_seq_num_ha [37-41] = _struct_conn.ptnr1_auth_seq_id # hb_icode_ha [42] append to # _struct_conn.ptnr1_auth_seq_id # hb_atom_end [44-47] = _struct_conn.ptnr2_label_atom_id # hb_alt_loc_end [48] = _struct_conn.ptnr2_label_alt_id # hb_res_name_end [49-51] = _struct_conn.ptnr2_label_comp_id # hb_chain_id_end [53] = _struct_conn.ptnr2_label_asym_id # hb_res_seq_num_end [55-58] = _struct_conn.ptnr2_auth_seq_id # hb_icode_end [59] append to # _struct_conn.ptnr2_auth_seq_id # hb_symop1 [60-65] = _struct_conn.ptnr1_symmetry # hb_symop2 [67-72] = _struct_conn.ptnr2_symmetry # { if (first_field == "HYDBND") { xlat_flag=xlat_save hb_atom_beg[++hydbnd_flag] = substr( ($0),13, 4) hb_alt_loc_beg[hydbnd_flag] = substr( ($0),17, 1) hb_res_name_beg[hydbnd_flag] = substr( ($0),18, 3) hb_chain_id_beg[hydbnd_flag] = substr( ($0),22, 1) hb_res_seq_num_beg[hydbnd_flag] = substr( ($0),24, 5) hb_atom_ha[hydbnd_flag] = substr( ($0),30, 4) hb_alt_loc_ha[hydbnd_flag] = substr( ($0),34, 1) hb_chain_id_ha[hydbnd_flag] = substr( ($0),36, 1) hb_res_seq_num_ha[hydbnd_flag] = substr( ($0),38, 5) hb_atom_end[hydbnd_flag] = substr( ($0),44, 4) hb_alt_loc_end[hydbnd_flag] = substr( ($0),48, 1) hb_res_name_end[hydbnd_flag] = substr( ($0),49, 3) hb_chain_id_end[hydbnd_flag] = substr( ($0),53, 1) hb_res_seq_num_end[hydbnd_flag] = substr( ($0),55, 5) hb_symm_1[hydbnd_flag] = substr( ($0),60, 6) hb_symm_2[hydbnd_flag] = substr( ($0),67, 6) {# # # fix up atom_name by squeezing out blanks in the middle # temp_a_name = hb_atom_beg[hydbnd_flag] if (substr( temp_a_name, 3, 1) == " ") { temp_a_name = (substr( temp_a_name, 1, 2) \ substr( temp_a_name, 4, 1) " " ) } if (substr( temp_a_name, 2, 1) == " ") { temp_a_name = (" " substr( temp_a_name, 1, 1) \ substr( temp_a_name, 3, 2) ) } if (temp_a_name == " ") temp_a_name = " . " ret_val=temp_a_name temp_name=ret_val} hb_atom_beg[hydbnd_flag] = temp_name {# # # fix up atom_name by squeezing out blanks in the middle # temp_a_name = hb_atom_end[hydbnd_flag] if (substr( temp_a_name, 3, 1) == " ") { temp_a_name = (substr( temp_a_name, 1, 2) \ substr( temp_a_name, 4, 1) " " ) } if (substr( temp_a_name, 2, 1) == " ") { temp_a_name = (" " substr( temp_a_name, 1, 1) \ substr( temp_a_name, 3, 2) ) } if (temp_a_name == " ") temp_a_name = " . " ret_val=temp_a_name temp_name=ret_val} hb_atom_end[hydbnd_flag] = temp_name {# # # fix up atom_name by squeezing out blanks in the middle # temp_a_name = hb_atom_ha[hydbnd_flag] if (substr( temp_a_name, 3, 1) == " ") { temp_a_name = (substr( temp_a_name, 1, 2) \ substr( temp_a_name, 4, 1) " " ) } if (substr( temp_a_name, 2, 1) == " ") { temp_a_name = (" " substr( temp_a_name, 1, 1) \ substr( temp_a_name, 3, 2) ) } if (temp_a_name == " ") temp_a_name = " . " ret_val=temp_a_name temp_name=ret_val} hb_atom_ha[hydbnd_flag] = temp_name if (hb_alt_loc_beg[hydbnd_flag] == " ") \ hb_alt_loc_beg[hydbnd_flag] = "." if (hb_alt_loc_end[hydbnd_flag] == " ") \ hb_alt_loc_end[hydbnd_flag] = "." if (hb_alt_loc_ha[hydbnd_flag] == " ") \ hb_alt_loc_ha[hydbnd_flag] = "." if (hb_res_name_beg[hydbnd_flag] == " ") \ hb_res_name_beg[hydbnd_flag] = " . " if (hb_res_name_end[hydbnd_flag] == " ") \ hb_res_name_end[hydbnd_flag] = " . " if (hb_res_seq_num_beg[hydbnd_flag] == " ") \ hb_res_seq_num_beg[hydbnd_flag] = " . " if (hb_res_seq_num_end[hydbnd_flag] == " ") \ hb_res_seq_num_end[hydbnd_flag] = " . " if (hb_res_seq_num_ha[hydbnd_flag] == " ") \ hb_res_seq_num_ha[hydbnd_flag] = " . " if (hb_chain_id_beg[hydbnd_flag] == " ") \ hb_chain_id_beg[hydbnd_flag] = bcid if (hb_chain_id_end[hydbnd_flag] == " ") \ hb_chain_id_end[hydbnd_flag] = bcid if (hb_chain_id_ha[hydbnd_flag] == " ") \ hb_chain_id_ha[hydbnd_flag] = bcid if (hb_symm_1[hydbnd_flag] == " ") { hb_symm_1[hydbnd_flag] = " . " } else { hb_symm_1[hydbnd_flag] = \ (substr(hb_symm_1[hydbnd_flag],1,3) "_" \ substr(hb_symm_1[hydbnd_flag],4,3)) } if (hb_symm_2[hydbnd_flag] == " ") { hb_symm_2[hydbnd_flag] = " . " } else { hb_symm_2[hydbnd_flag] = \ (substr(hb_symm_2[hydbnd_flag],1,3) "_" \ substr(hb_symm_2[hydbnd_flag],4,3)) } } } #================================================================== # Keyword JRNL # # As defined by the PDB, this is the primary citation that matches the # given coordinate set. It is written before the REMARK 2 record # # "primary" = _citation.id # = _citation_author.citation_id # "yes"/"no" = _citation.coordinate_linkage # jrnl_rec_type [13-16] = # jrnl_cont [17-18] = # #AUTH # jrnl_auth [20-70] = _citation_author.name # #TITL # jrnl_titl [20-70] = _citation.title # #REF # jrnl_ref_jour [20-47] = _citation.journal_abbrev (this is not # always abbreviated but it will do) # jrnl_ref_vol [53-55] = _citation.journal_volume # "?" = _citation.journal_issue # jrnl_ref_page [57-61] = _citation.page_first # "?" = _citation.page_last # jrnl_ref_year [63-66] = _citation.year # #PUBL # jrnl_pub_pub [20-70] = _citation.book_publisher # #REFN # jrnl_astm [25-30] = _citation.journal_id_ASTM # jrnl_country [33-34] = _citation.country # jrnl_isbn [41-65] = _citation.journal_id_ISSN or # = _citation.book_id_ISBN # "?" = _citation.abstract # # = _citation.details { if (first_field == "JRNL") { xlat_flag=xlat_save flush_refs = 1 jrnl_rec_type = substr( ($0), 13, 4) jrnl_cont = substr( ($0), 17, 2) jrnl_title = substr( ($0), 20, 51) text = substr( ($0), 20, 51) jrnl_auth = text if (convtext == "yes") \ {# # apply PDB typsetting codes if any to a line # { lx_tl = length(text) tx_tl = text lostr = "" for (ix_tl = 1; ix_tl <= lx_tl; ++ix_tl) { cx_tl = substr(tx_tl,ix_tl,1) cx_tl = substr((lcaz cx_tl), \ index((UCAZ cx_tl), cx_tl),1) lostr = (lostr cx_tl) } } lstr = length(lostr) mystr = "" pchar = " " for( qtsi=1; qtsi <= lstr; ++qtsi) { mychar = substr( lostr, qtsi, 1) if( pchar == " " || \ pchar == "," || \ pchar == "." || \ pchar == "(" || \ pchar == "*" || \ pchar == "/" ) { mychar = substr((UCAZ mychar),\ index((lcaz mychar), mychar),1) } if( ( mychar != "*" && mychar != "$" && mychar != "/" ) \ || (mychar == pchar) ) mystr = (mystr mychar) if( pchar == "/" ){ if( mychar == "$" || mychar == "-" ) { pchar = mychar } } else pchar = mychar } ret_val=mystr text=ret_val} jrnl_title = text cit_flag = 1 primary = "yes" cit_refNum[cit_flag] = "primary" if (jrnl_rec_type == "TITL" && jrnl_cont == " ") { cit_title_1[cit_flag] = jrnl_title cit_title_2[cit_flag] = "" } if (jrnl_rec_type == "TITL" && jrnl_cont != " ") { if (cit_title_2[cit_flag] == "") cit_title_2[cit_flag] = jrnl_title else \ cit_title_2[cit_flag] = (cit_title_2[cit_flag] "\n " jrnl_title) } if (jrnl_rec_type == "AUTH" && jrnl_cont == " ") { cit_auth_1[cit_flag] = jrnl_auth cit_auth_2[cit_flag] = "" } if (jrnl_rec_type == "AUTH" && jrnl_cont != " ") cit_auth_2[cit_flag] = (cit_auth_2[cit_flag] jrnl_auth) if (jrnl_rec_type == "REF " && jrnl_cont == " ") { jour_1[cit_flag] = substr ( ($0), 20, 28 ) jour_2[cit_flag] = "" volu[cit_flag] = substr ( ($0), 52, 4) page[cit_flag] = substr ( ($0), 57, 5) year[cit_flag] = substr ( ($0), 63, 4) jrnl_pub_pub_1[cit_flag] = "?" } if (jrnl_rec_type == "REF " && jrnl_cont != " ") { if ( jour_2[cit_flag] == "") jour_2[cit_flag] = substr ( ($0), 20, 28) else \ jour_2[cit_flag] = \ (jour_2[cit_flag] "\n " substr ( ($0), 20, 28) ) } if (jrnl_rec_type == "PUBL" && jrnl_cont == " " ) { jrnl_pub_pub[cit_flag] = substr ( ($0), 20, 51 ) jour_1[cit_flag] = "?" if (volu[cit_flag] == "" || \ volu[cit_flag] == " " ) \ volu[cit_flag] = "?" page[cit_flag] = "?" year[cit_flag] = "?" } if (jrnl_rec_type == "PUBL" && jrnl_cont == " ") { jrnl_pub_pub_1[cit_flag] = substr ( ($0), 20, 51 ) jrnl_pub_pub_2[cit_flag] = "" } if (jrnl_rec_type == "PUBL" && jrnl_cont != " ") { if ( jrnl_pub_pub_2[cit_flag] == "") \ jrnl_pub_pub_2[cit_flag] = substr ( ($0), 20, 51) else \ jrnl_pub_pub_2[cit_flag] = \ (jrnl_pub_pub_2[cit_flag] "\n " substr ( ($0), 20, 51) ) } if (jrnl_rec_type == "REFN") { astm[cit_flag] = substr ( ($0), 25, 6) country[cit_flag] = substr ( ($0), 33, 2) issn_isbn[cit_flag] = substr ( ($0), 36, 4) if (issn_isbn[cit_flag] == " " && \ substr(jour_1[cit_flag],1,9) != "TO BE PUB") { if (jrnl_pub_pub_1[cit_flag] != "?") { issn_isbn[cit_flag] = "ISBN" } if (volu[cit_flag] == "" || \ volu[cit_flag] == "?" || \ volu[cit_flag] == " ") { issn_isbn[cit_flag] = "ISBN" } } if (issn_isbn[cit_flag] != "ISBN") { isbn[cit_flag] = "?" issn[cit_flag] = substr ( ($0), 41, 25) } else { issn[cit_flag] = "?" isbn[cit_flag] = substr ( ($0), 41, 25) } csd[cit_flag] = substr ( ($0), 67, 4) if (csd[cit_flag] == " ") csd[cit_flag] = "?" } } } #==================================================================== # Keyword KEYWRD # # keywrd_list [11-70] == _struct_keywords.text # from HEADER: # head_PDB_code == _struct_keywords.entry_id { if (first_field == "KEYWRD" || first_field == "KEYWDS") { xlat_flag=xlat_save keywrd_list = substr ( ($0), 11, 60) # if ( keywrd_flag == 0 ) { keywrd_tail = "" key_save[++keywrd_flag] = \ "\n\n\n" key_save[++keywrd_flag] = \ "#############################\n" key_save[++keywrd_flag] = \ "# #\n" key_save[++keywrd_flag] = \ "# STRUCT_KEYWORDS #\n" key_save[++keywrd_flag] = \ "# #\n" key_save[++keywrd_flag] = \ "#############################\n\n" key_save[++keywrd_flag] = \ "loop_\n" key_save[++keywrd_flag] = \ "_struct_keywords.entry_id\n" key_save[++keywrd_flag] = \ "_struct_keywords.text\n" } if (key_tail != "") { --keywrd_flag keywrd_list = (key_tail keywrd_list) } num_keys = split(keywrd_list,keyslist,comma ) for (ii=1; ii <= num_keys; ++ii){ num_k_split = split(keyslist[ii],keys_split," ") keyslist[ii] = "" if (num_k_split > 0) { keyslist[ii] = keys_split[1] for (j=2; j <= num_k_split; ++j) { keyslist[ii] = (keyslist[ii] " " keys_split[j]) } key_save[++keywrd_flag] = \ sprintf( "%4s '%s'\n", head_PDB_code, keyslist[ii]) } key_tail = "" if (num_keys > 0) key_tail = keyslist[num_keys] } } } #=========================================================================== # Keyword LINK # # Introduced with the February 1996 PDB format # # lk_atom_beg [13-16] = _struct_conn.ptnr1_label_atom_id # lk_alt_loc_beg [17] = _struct_conn.ptnr1_label_alt_id # lk_res_name_beg [18-20] = _struct_conn.ptnr1_label_comp_id # lk_chain_id_beg [22] = _struct_conn.ptnr1_label_asym_id # lk_res_seq_num_beg [23-26] = _struct_conn.ptnr1_auth_seq_id # lk_icode_beg [27] append to # _struct_conn.ptnr1_auth_seq_id # lk_atom_end [43-46] = _struct_conn.ptnr2_label_atom_id # lk_alt_loc_end [47] = _struct_conn.ptnr2_label_alt_id # lk_res_name_end [48-50] = _struct_conn.ptnr2_label_comp_id # lk_chain_id_end [52] = _struct_conn.ptnr2_label_asym_id # lk_res_seq_num_end [53-56] = _struct_conn.ptnr2_auth_seq_id # lk_icode_end [57] append to # _struct_conn.ptnr2_auth_seq_id # lk_symop1 [60-65] = _struct_conn.ptnr1_symmetry # lk_symop2 [67-72] = _struct_conn.ptnr2_symmetry # { if (first_field == "LINK") { xlat_flag=xlat_save lk_atom_beg[++link_flag] = substr( ($0),13, 4) lk_alt_loc_beg[link_flag] = substr( ($0),17, 1) lk_res_name_beg[link_flag] = substr( ($0),18, 3) lk_chain_id_beg[link_flag] = substr( ($0),22, 1) lk_res_seq_num_beg[link_flag] = substr( ($0),23, 5) lk_atom_end[link_flag] = substr( ($0),43, 4) lk_alt_loc_end[link_flag] = substr( ($0),47, 1) lk_res_name_end[link_flag] = substr( ($0),48, 3) lk_chain_id_end[link_flag] = substr( ($0),52, 1) lk_res_seq_num_end[link_flag] = substr( ($0),53, 5) lk_symm_1[link_flag] = substr( ($0),60, 6) lk_symm_2[link_flag] = substr( ($0),67, 6) {# # # fix up atom_name by squeezing out blanks in the middle # temp_a_name = lk_atom_beg[link_flag] if (substr( temp_a_name, 3, 1) == " ") { temp_a_name = (substr( temp_a_name, 1, 2) \ substr( temp_a_name, 4, 1) " " ) } if (substr( temp_a_name, 2, 1) == " ") { temp_a_name = (" " substr( temp_a_name, 1, 1) \ substr( temp_a_name, 3, 2) ) } if (temp_a_name == " ") temp_a_name = " . " ret_val=temp_a_name temp_name=ret_val} lk_atom_beg[link_flag] = temp_name {# # # fix up atom_name by squeezing out blanks in the middle # temp_a_name = lk_atom_end[link_flag] if (substr( temp_a_name, 3, 1) == " ") { temp_a_name = (substr( temp_a_name, 1, 2) \ substr( temp_a_name, 4, 1) " " ) } if (substr( temp_a_name, 2, 1) == " ") { temp_a_name = (" " substr( temp_a_name, 1, 1) \ substr( temp_a_name, 3, 2) ) } if (temp_a_name == " ") temp_a_name = " . " ret_val=temp_a_name temp_name=ret_val} lk_atom_end[link_flag] = temp_name if (lk_alt_loc_beg[link_flag] == " ") \ lk_alt_loc_beg[link_flag] = "." if (lk_alt_loc_end[link_flag] == " ") \ lk_alt_loc_end[link_flag] = "." if (lk_res_name_beg[link_flag] == " ") \ lk_res_name_beg[link_flag] = " . " if (lk_res_name_end[link_flag] == " ") \ lk_res_name_end[link_flag] = " . " if (lk_chain_id_beg[link_flag] == " ") \ lk_chain_id_beg[link_flag] = bcid if (lk_chain_id_end[link_flag] == " ") \ lk_chain_id_end[link_flag] = bcid if (lk_symm_1[link_flag] == " ") { lk_symm_1[link_flag] = " . " } else { lk_symm_1[link_flag] = \ (substr(lk_symm_1[link_flag],1,3) "_" \ substr(lk_symm_1[link_flag],4,3)) } if (lk_symm_2[link_flag] == " ") { lk_symm_2[link_flag] = " . " } else { lk_symm_2[link_flag] = \ (substr(lk_symm_2[link_flag],1,3) "_" \ substr(lk_symm_2[link_flag],4,3)) } } } #============================================================================= # Keyword MASTER # # (used in END statement) { if (first_field == "MASTER") { xlat_flag=xlat_save # parse totals total_remark = substr( ($0), 11, 5) total_ftnote = substr( ($0), 16, 5) total_het = substr( ($0), 21, 5) total_helix = substr( ($0), 26, 5) total_sheet = substr( ($0), 31, 5) total_turn = substr( ($0), 36, 5) total_site = substr( ($0), 41, 5) total_o_s_m = substr( ($0), 46, 5) total_a_h = substr( ($0), 51, 5) total_ter = substr( ($0), 56, 5) total_conect = substr( ($0), 61, 5) total_seqres = substr( ($0), 66, 5) } } #============================================================================= # Keyword MODEL { if (first_field == "MODEL") { xlat_flag=xlat_save model_flag = $2 model_flags = "yes" if (model_flag == "") model_flag = "." } } #=========================================================================== # Keyword MODRES # # In the 1995 format, a new record, MODRES, was added to provide # "descriptions of modifications (e.g., chemical or post- # translational) to protein and nucleic acid residues. Inlcuded # are mapping between residue names given in a PDB entry # and standard residues." We treat this record as if it were # a SEQADV with no database specified. To complete the necessary # mmCIf category relationships, a dummy DBREF is created for # each chain involved. # # MODRES [1- 6] # modres_idcode [8-11] = idcode of this entry (not used) # modres_resName [13-15] = _struct_ref_seq_dif.mon_id # modres_chainID [ 17 ] = _struct_ref.biol_id # modres_seq [19-23] = combines seqNum and insertCode # used to derive # _struct_ref_seq_dif.seq_num # "." = _struct_ref.db_name # "." # = _struct_ref.db_code # modres_dbRes [25-27] = _struct_ref_seq_dif.db_mon_id # "." = _struct_ref_seq_dif.db_seq_num # modres_conflict [30-70] = _struct_ref_seq_dif.details { if (first_field == "MODRES") { xlat_flag=xlat_save modres_resName[++modres_flag] = substr( ($0), 13, 3) modres_chainID[modres_flag] = substr( ($0), 17, 1) if (modres_chainID[modres_flag] == " ") \ modres_chainID[modres_flag] = bcid modres_seq[modres_flag] = substr( ($0), 19, 5) modres_dbRes[modres_flag] = substr( ($0), 25, 3) modres_conflict[modres_flag] = substr( ($0), 30, 41) if (modres_dbSeq[modres_flag] == " ") { modres_dbSeq[modres_flag] = "." } modres_conflict[modres_flag] = \ ("Chain " modres_chainID[modres_flag] ": " \ modres_conflict[modres_flag] ) numx = split(modres_conflict[modres_flag],dblist," ") modres_conflict[modres_flag] = "." if (numx > 0) { modres_conflict[modres_flag] = dblist[1] for (j=2; j <= numx; ++j) { modres_conflict[modres_flag] = \ ( modres_conflict[modres_flag] " " dblist[j] ) } if (numx >1) modres_conflict[modres_flag] = \ ("'" modres_conflict[modres_flag] "'") } } } #============================================================================ # Keyword MTRIX # { if (first_field == "MTRIX1" || first_field == "MTRIX2" \ || first_field == "MTRIX3") { xlat_flag=xlat_save mtrix_col1 = substr ( ($0), 11, 10) mtrix_col2 = substr ( ($0), 21, 10) mtrix_col3 = substr ( ($0), 31, 10) mtrix_col4 = substr ( ($0), 46, 10) # print loop headers if (mtrix_flag == "0") { mat_save[++mtrix_flag] = "\n\n\n" mat_save[++mtrix_flag] = "##############################\n" mat_save[++mtrix_flag] = "# #\n" mat_save[++mtrix_flag] = "# STRUCT_NCS_OPER #\n" mat_save[++mtrix_flag] = "# #\n" mat_save[++mtrix_flag] = "##############################\n" mat_save[++mtrix_flag] = "\n" mat_save[++mtrix_flag] = "# **** WARNING **** Domain information needed \n" warning_list[++warning_flag] = \ "#=# STRUCT_NCS_OPER: Domain information needed\n" mat_save[++mtrix_flag] = "\nloop_ \n" mat_save[++mtrix_flag] = "_struct_ncs_oper.id\n" mat_save[++mtrix_flag] = "_struct_ncs_oper.code\n" mat_save[++mtrix_flag] = "_struct_ncs_oper.matrix[1][1]\n" mat_save[++mtrix_flag] = "_struct_ncs_oper.matrix[1][2]\n" mat_save[++mtrix_flag] = "_struct_ncs_oper.matrix[1][3]\n" mat_save[++mtrix_flag] = "_struct_ncs_oper.vector[1] \n" mat_save[++mtrix_flag] = "_struct_ncs_oper.matrix[2][1]\n" mat_save[++mtrix_flag] = "_struct_ncs_oper.matrix[2][2]\n" mat_save[++mtrix_flag] = "_struct_ncs_oper.matrix[2][3]\n" mat_save[++mtrix_flag] = "_struct_ncs_oper.vector[2] \n" mat_save[++mtrix_flag] = "_struct_ncs_oper.matrix[3][1]\n" mat_save[++mtrix_flag] = "_struct_ncs_oper.matrix[3][2]\n" mat_save[++mtrix_flag] = "_struct_ncs_oper.matrix[3][3]\n" mat_save[++mtrix_flag] = "_struct_ncs_oper.vector[3] \n" } mtrix_id = substr( ($0), 8, 3) mtrix_given = substr( ($0),60, 1) x_given = "generate" if (mtrix_given != " ") x_given = "given" if (first_field == "MTRIX1") { mat_save[++mtrix_flag]=sprintf ("%3s %s\n", mtrix_id, x_given) } mat_save[++mtrix_flag] = (mtrix_col1 " " \ mtrix_col2 " " \ mtrix_col3 " " \ mtrix_col4 "\n") } } #=========================================================================== # Keyword OBSLTE: see SPRSDE, below # #============================================================================ # Keyword ORIGX # # _database_pdb_matrix.origx[1][1] .. [3][3] # _database_pdb_matrix.origx_vector[1] .. _3 { if (first_field == "ORIGX1" || first_field == "ORIGX2" \ || first_field == "ORIGX3") { xlat_flag=xlat_save origx_col1 = substr ( ($0), 11, 10) origx_col2 = substr ( ($0), 21, 10) origx_col3 = substr ( ($0), 31, 10) origx_col4 = substr ( ($0), 46, 10) # print loop headers if (origx_flag == "0") { om_save[++origx_flag] = "\n\n\n" om_save[++origx_flag] = "\nloop_ \n" om_save[++origx_flag] = "_database_pdb_matrix.entry_id\n" om_save[++origx_flag] = "_database_pdb_matrix.origx[1][1]\n" om_save[++origx_flag] = "_database_pdb_matrix.origx[1][2]\n" om_save[++origx_flag] = "_database_pdb_matrix.origx[1][3]\n" om_save[++origx_flag] = "_database_pdb_matrix.origx_vector[1] \n" om_save[++origx_flag] = "_database_pdb_matrix.origx[2][1]\n" om_save[++origx_flag] = "_database_pdb_matrix.origx[2][2]\n" om_save[++origx_flag] = "_database_pdb_matrix.origx[2][3]\n" om_save[++origx_flag] = "_database_pdb_matrix.origx_vector[2] \n" om_save[++origx_flag] = "_database_pdb_matrix.origx[3][1]\n" om_save[++origx_flag] = "_database_pdb_matrix.origx[3][2]\n" om_save[++origx_flag] = "_database_pdb_matrix.origx[3][3]\n" om_save[++origx_flag] = "_database_pdb_matrix.origx_vector[3] \n" om_save[++origx_flag] = (" " head_PDB_code "\n\n") } origx_id = substr( ($0), 8, 3) om_save[++origx_flag] = (origx_col1 " " \ origx_col2 " " \ origx_col3 " " \ origx_col4 "\n" ) } } #=========================================================================== # Keyword REMARK # # # print all citations from JNRL and REMARK 1 records # # First check if it is time to flush references # { if (flush_refs == 1 ){ remark_number = substr ( ($0), 8, 3) if ((first_field == "REMARK" && \ remark_number == " 2" && \ $3 == "RESOLUTION.") || \ ((first_field != "REMARK") && (first_field !="JRNL")) ) { if (jrnl_flag == "1") { printf ("\nloop_\n") printf ("_citation.id\n") printf ("_citation.coordinate_linkage\n") printf ("_citation.title\n") printf ("_citation.country\n") printf ("_citation.journal_abbrev\n") printf ("_citation.journal_volume\n") printf ("_citation.journal_issue\n") printf ("_citation.page_first\n") printf ("_citation.year\n") printf ("_citation.journal_id_ASTM\n") printf ("_citation.journal_id_ISSN\n") printf ("_citation.journal_id_CSD\n") printf ("_citation.book_title\n") printf ("_citation.book_publisher\n") printf ("_citation.book_id_ISBN\n") printf ("_citation.details\n") ++jrnl_flag } cit_decr = 0 if (primary) cit_decr = 1 for (i=1; i <= cit_flag; ++i) { if (i == "1" && primary) { printf (" \nprimary yes\n") } else { printf (" \n%3s no\n", i-cit_decr) if (i-cit_decr != cit_refNum[i]) { warning_list[++warning_flag] = \ sprintf ("#=# CITATION: Mismatch PDB refNum %s to id %s\n", \ cit_refNum[i],i-cit_decr) } } # for books # _citation.title == TITL (if present) # _citation.country == country[i] # _citation.journal_abbrev == ? # _citation.journal_volume == volu[i] # _citation.journal_issue == ? # _citation.page_first == ? # _citation.year == year[i] # _citation.journal_id_ASTM == ? # _citation.journal_id_ISSN == ? # _citation.journal_id_PDB == ? # _citation.book_title == REF (jour_x) # _citation.book_publisher == jrnl_pub_pub_x[i] # _citation.book_id_ISBN == isbn[i] # _citation.details == ? if (jrnl_pub_pub_1[i] != "?" ||\ issn_isbn[i] == "ISBN") { if (country[i] == " ") country[i] = "?" if (jour_1[i] == " ") \ jour_1[i] = "?" if (volu[i] == " ") volu[i] = "?" if (year[i] == " ") year[i] = "?" if (page[i] == " ") page[i] = "?" if (cit_title_1[i]) printf ("; %s\n", cit_title_1[i]) else printf (" ? ") if (cit_title_2[i]) printf (" %s\n", cit_title_2[i]) if (cit_title_1[i]) printf (";\n") printf (" %2s ? %3s ? %5s %4s ? ? %4s\n" \ ,country[i],volu[i], page[i], year[i], csd[i]) if (!jour_2[i]) { printf (" '%28s' \n", jour_1[i]) } if(jour_2[i]) { printf ("; %28s\n %s\n;\n",jour_1[i],jour_2[i])} if (jrnl_pub_pub_1[i]) printf ("; %s \n", jrnl_pub_pub_1[i]) if (jrnl_pub_pub_2[i]) printf (" %s \n", jrnl_pub_pub_2[i]) if (jrnl_pub_pub_1[i]) printf (";\n") printf (" '%25s' ? \n", isbn[i]) } else { # for journals if (cit_title_1[i]) printf ("; %s\n", cit_title_1[i]) if (cit_title_2[i]) printf (" %s\n", cit_title_2[i]) if (country[i] == " ") country[i] = "?" if (volu[i] == " ") volu[i] = "?" if (year[i] == " ") year[i] = "?" if (page[i] == " ") page[i] = "?" if (!jour_2[i]) { printf (";\n %2s '%28s' %4s ? %5s %4s \n'%-15s' '%15s' %4s ? ? ? ?\n",\ country[i], jour_1[i], volu[i], page[i], year[i], astm[i], \ issn[i], csd[i]) } if(jour_2[i]) { printf ((";\n %2s \n; %-28s\n %s\n;\n"\ " %4s ? %5s %4s \n'%-15s' '%15s' %4s ? ? ? ?\n"),\ country[i], jour_1[i], jour_2[i], volu[i], page[i], year[i], astm[i], \ issn[i], csd[i]) } } } # Loop Editor List for (i=1; i <= cit_flag; ++i) { if (cit_edit_1[i]) { printf ("\nloop_\n") printf ("_citation_editor.citation_id\n") printf ("_citation_editor.name\n") break } } for (i=1; i <= cit_flag; ++i) { if (cit_edit_1[i]) { num_edit = split(cit_edit_1[i],editors,comma ) for (ii=1; ii <= num_edit; ++ii){ num_e_split = split(editors[ii],e_split," ") editors[ii] = "" if (num_e_split > 0) { editors[ii] = e_split[1] for (j=2; j <= num_e_split; ++j) { editors[ii] = (editors[ii] " " e_split[j]) } } if (auth_convtext == "yes" || \ (auth_convtext == "conditional" \ && convtext == "yes")) { {# # produce a CIF-style name from a PDB name # # begin by applying typesetting codes if any # but always treat "-" and "'" as breaks for capitalization # in names # { lx_tl = length(editors[ii]) tx_tl = editors[ii] lostr = "" for (ix_tl = 1; ix_tl <= lx_tl; ++ix_tl) { cx_tl = substr(tx_tl,ix_tl,1) cx_tl = substr((lcaz cx_tl), \ index((UCAZ cx_tl), cx_tl),1) lostr = (lostr cx_tl) } } lstr = length(lostr) mystr = "" pchar = " " for( qnsi=1; qnsi <= lstr; ++qnsi) { mychar = substr( lostr, qnsi, 1) if( pchar == " " || \ pchar == "," || \ pchar == "." || \ pchar == "-" || \ pchar == "'" || \ pchar == "(" || \ pchar == "*" || \ pchar == "/" ) { mychar = substr((UCAZ mychar),\ index((lcaz mychar), mychar),1) } if( ( mychar != "*" && mychar != "$" && mychar != "/" ) \ || (mychar == pchar) ) mystr = (mystr mychar) if( pchar == "/" ){ if( mychar == "$" || mychar == "-" ) { pchar = mychar } # end if( mychar == "$" || mychar == "-" ) } else { pchar = mychar } # end if( pchar == "/" ) } # end for( qnsi=1; qnsi <= lstr; ++qnsi) # # See if a specific replacement was given # { lx_tu = length(mystr) tx_tu = mystr name_temp = "" for (ix_tu = 1; ix_tu <= lx_tu; ++ix_tu) { cx_tu = substr(tx_tu,ix_tu,1) cx_tu = substr((UCAZ cx_tu), \ index((lcaz cx_tu), cx_tu),1) name_temp = (name_temp cx_tu) } } if (rep_name[name_temp] != "") mystr = rep_name[name_temp] # # See if there is a comma in place if so we are done # if (index(mystr,comma) != 0 ){ ret_val=mystr } else { nam_suf = "" num_namp = split(mystr,x_namep," ") if (num_namp > 1) { { lx_tu = length(x_namep[num_namp]) tx_tu = x_namep[num_namp] xtemp = "" for (ix_tu = 1; ix_tu <= lx_tu; ++ix_tu) { cx_tu = substr(tx_tu,ix_tu,1) cx_tu = substr((UCAZ cx_tu), \ index((lcaz cx_tu), cx_tu),1) xtemp = (xtemp cx_tu) } } if (rep_suffix[xtemp] != ""){ if(junior_on_last == "yes") { x_namep[num_namp] = rep_suffix[xtemp] } else { nam_suf = (" " rep_suffix[xtemp] ) --num_namp } } mystr = x_namep[1] for (knamp = 2; knamp <= num_namp; ++knamp) { mystr = (mystr " " x_namep[knamp]) } } # end if (num_namp > 1) llname = length(mystr) cc = "" for (kc=llname-1; kc>1; --kc) { cp = cc cc = substr(mystr,kc,1) if (cc == ".") { if (cp != " ") { mystr = (substr(mystr,kc+1,llname-kc) comma " " \ substr(mystr,1,kc)) } else { mystr = (substr(mystr,kc+2,llname-kc-1) comma " " \ substr(mystr,1,kc)) } # if (cp != " ") kc=0 } # end if (cc == ".") } # for (kc=llname-1; kc>1; --kc) mystr = (mystr nam_suf) ret_val=mystr } # end if (index(mystr,comma) != 0 ) editors[ii]=ret_val} } } if (cit_edit_2[i]) (num_edit = num_edit-1) for (j=1; j<= num_edit; ++j ) { if ( (primary) && i == "1" ) printf (" primary '%s' \n", editors[j]) else printf (" %3s '%s' \n", i-cit_decr, editors[j]) } } if (cit_edit_2[i]) { num_edit = split(cit_edit_2[i],editors,comma ) for (ii=1; ii <= num_edit; ++ii){ num_e_split = split(editors[ii],e_split," ") editors[ii] = "" if (num_e_split > 0) { editors[ii] = e_split[1] for (j=2; j <= num_e_split; ++j) { editors[ii] = (editors[ii] " " e_split[j]) } } if (auth_convtext == "yes" || \ (auth_convtext == "conditional" \ && convtext == "yes")) { {# # produce a CIF-style name from a PDB name # # begin by applying typesetting codes if any # but always treat "-" and "'" as breaks for capitalization # in names # { lx_tl = length(editors[ii]) tx_tl = editors[ii] lostr = "" for (ix_tl = 1; ix_tl <= lx_tl; ++ix_tl) { cx_tl = substr(tx_tl,ix_tl,1) cx_tl = substr((lcaz cx_tl), \ index((UCAZ cx_tl), cx_tl),1) lostr = (lostr cx_tl) } } lstr = length(lostr) mystr = "" pchar = " " for( qnsi=1; qnsi <= lstr; ++qnsi) { mychar = substr( lostr, qnsi, 1) if( pchar == " " || \ pchar == "," || \ pchar == "." || \ pchar == "-" || \ pchar == "'" || \ pchar == "(" || \ pchar == "*" || \ pchar == "/" ) { mychar = substr((UCAZ mychar),\ index((lcaz mychar), mychar),1) } if( ( mychar != "*" && mychar != "$" && mychar != "/" ) \ || (mychar == pchar) ) mystr = (mystr mychar) if( pchar == "/" ){ if( mychar == "$" || mychar == "-" ) { pchar = mychar } # end if( mychar == "$" || mychar == "-" ) } else { pchar = mychar } # end if( pchar == "/" ) } # end for( qnsi=1; qnsi <= lstr; ++qnsi) # # See if a specific replacement was given # { lx_tu = length(mystr) tx_tu = mystr name_temp = "" for (ix_tu = 1; ix_tu <= lx_tu; ++ix_tu) { cx_tu = substr(tx_tu,ix_tu,1) cx_tu = substr((UCAZ cx_tu), \ index((lcaz cx_tu), cx_tu),1) name_temp = (name_temp cx_tu) } } if (rep_name[name_temp] != "") mystr = rep_name[name_temp] # # See if there is a comma in place if so we are done # if (index(mystr,comma) != 0 ){ ret_val=mystr } else { nam_suf = "" num_namp = split(mystr,x_namep," ") if (num_namp > 1) { { lx_tu = length(x_namep[num_namp]) tx_tu = x_namep[num_namp] xtemp = "" for (ix_tu = 1; ix_tu <= lx_tu; ++ix_tu) { cx_tu = substr(tx_tu,ix_tu,1) cx_tu = substr((UCAZ cx_tu), \ index((lcaz cx_tu), cx_tu),1) xtemp = (xtemp cx_tu) } } if (rep_suffix[xtemp] != ""){ if(junior_on_last == "yes") { x_namep[num_namp] = rep_suffix[xtemp] } else { nam_suf = (" " rep_suffix[xtemp] ) --num_namp } } mystr = x_namep[1] for (knamp = 2; knamp <= num_namp; ++knamp) { mystr = (mystr " " x_namep[knamp]) } } # end if (num_namp > 1) llname = length(mystr) cc = "" for (kc=llname-1; kc>1; --kc) { cp = cc cc = substr(mystr,kc,1) if (cc == ".") { if (cp != " ") { mystr = (substr(mystr,kc+1,llname-kc) comma " " \ substr(mystr,1,kc)) } else { mystr = (substr(mystr,kc+2,llname-kc-1) comma " " \ substr(mystr,1,kc)) } # if (cp != " ") kc=0 } # end if (cc == ".") } # for (kc=llname-1; kc>1; --kc) mystr = (mystr nam_suf) ret_val=mystr } # end if (index(mystr,comma) != 0 ) editors[ii]=ret_val} } } for (j=1; j<= num_edit; ++j ) { if ( (primary) && i == "1" ) printf (" primary '%s' \n", editors[j]) else printf (" %3s '%s' \n", i-cit_decr, editors[j]) } } } # Loop Author List for (i=1; i <= cit_flag; ++i) { if (cit_auth_1[i]) { printf ("\nloop_\n") printf ("_citation_author.citation_id\n") printf ("_citation_author.name\n") break } } for (i=1; i <= cit_flag; ++i) { if (cit_auth_1[i]) { num_auth = split(cit_auth_1[i],authors,comma ) for (ii=1; ii <= num_auth; ++ii){ num_a_split = split(authors[ii],a_split," ") authors[ii] = "" if (num_a_split > 0) { authors[ii] = a_split[1] for (j=2; j <= num_a_split; ++j) { authors[ii] = (authors[ii] " " a_split[j]) } } if (auth_convtext == "yes" || \ (auth_convtext == "conditional" \ && convtext == "yes")) { {# # produce a CIF-style name from a PDB name # # begin by applying typesetting codes if any # but always treat "-" and "'" as breaks for capitalization # in names # { lx_tl = length(authors[ii]) tx_tl = authors[ii] lostr = "" for (ix_tl = 1; ix_tl <= lx_tl; ++ix_tl) { cx_tl = substr(tx_tl,ix_tl,1) cx_tl = substr((lcaz cx_tl), \ index((UCAZ cx_tl), cx_tl),1) lostr = (lostr cx_tl) } } lstr = length(lostr) mystr = "" pchar = " " for( qnsi=1; qnsi <= lstr; ++qnsi) { mychar = substr( lostr, qnsi, 1) if( pchar == " " || \ pchar == "," || \ pchar == "." || \ pchar == "-" || \ pchar == "'" || \ pchar == "(" || \ pchar == "*" || \ pchar == "/" ) { mychar = substr((UCAZ mychar),\ index((lcaz mychar), mychar),1) } if( ( mychar != "*" && mychar != "$" && mychar != "/" ) \ || (mychar == pchar) ) mystr = (mystr mychar) if( pchar == "/" ){ if( mychar == "$" || mychar == "-" ) { pchar = mychar } # end if( mychar == "$" || mychar == "-" ) } else { pchar = mychar } # end if( pchar == "/" ) } # end for( qnsi=1; qnsi <= lstr; ++qnsi) # # See if a specific replacement was given # { lx_tu = length(mystr) tx_tu = mystr name_temp = "" for (ix_tu = 1; ix_tu <= lx_tu; ++ix_tu) { cx_tu = substr(tx_tu,ix_tu,1) cx_tu = substr((UCAZ cx_tu), \ index((lcaz cx_tu), cx_tu),1) name_temp = (name_temp cx_tu) } } if (rep_name[name_temp] != "") mystr = rep_name[name_temp] # # See if there is a comma in place if so we are done # if (index(mystr,comma) != 0 ){ ret_val=mystr } else { nam_suf = "" num_namp = split(mystr,x_namep," ") if (num_namp > 1) { { lx_tu = length(x_namep[num_namp]) tx_tu = x_namep[num_namp] xtemp = "" for (ix_tu = 1; ix_tu <= lx_tu; ++ix_tu) { cx_tu = substr(tx_tu,ix_tu,1) cx_tu = substr((UCAZ cx_tu), \ index((lcaz cx_tu), cx_tu),1) xtemp = (xtemp cx_tu) } } if (rep_suffix[xtemp] != ""){ if(junior_on_last == "yes") { x_namep[num_namp] = rep_suffix[xtemp] } else { nam_suf = (" " rep_suffix[xtemp] ) --num_namp } } mystr = x_namep[1] for (knamp = 2; knamp <= num_namp; ++knamp) { mystr = (mystr " " x_namep[knamp]) } } # end if (num_namp > 1) llname = length(mystr) cc = "" for (kc=llname-1; kc>1; --kc) { cp = cc cc = substr(mystr,kc,1) if (cc == ".") { if (cp != " ") { mystr = (substr(mystr,kc+1,llname-kc) comma " " \ substr(mystr,1,kc)) } else { mystr = (substr(mystr,kc+2,llname-kc-1) comma " " \ substr(mystr,1,kc)) } # if (cp != " ") kc=0 } # end if (cc == ".") } # for (kc=llname-1; kc>1; --kc) mystr = (mystr nam_suf) ret_val=mystr } # end if (index(mystr,comma) != 0 ) authors[ii]=ret_val} } } if (cit_auth_2[i]) (num_auth = num_auth-1) for (j=1; j<= num_auth; ++j ) { if ( (primary) && i == "1" ) printf (" primary '%s' \n", authors[j]) else printf (" %3s '%s' \n", i-cit_decr, authors[j]) } } if (cit_auth_2[i]) { num_auth = split(cit_auth_2[i],authors,comma ) for (ii=1; ii <= num_auth; ++ii){ num_a_split = split(authors[ii],a_split," ") authors[ii] = "" if (num_a_split > 0) { authors[ii] = a_split[1] for (j=2; j <= num_a_split; ++j) { authors[ii] = (authors[ii] " " a_split[j]) } } if (auth_convtext == "yes" || \ (auth_convtext == "conditional" \ && convtext == "yes")) { {# # produce a CIF-style name from a PDB name # # begin by applying typesetting codes if any # but always treat "-" and "'" as breaks for capitalization # in names # { lx_tl = length(authors[ii]) tx_tl = authors[ii] lostr = "" for (ix_tl = 1; ix_tl <= lx_tl; ++ix_tl) { cx_tl = substr(tx_tl,ix_tl,1) cx_tl = substr((lcaz cx_tl), \ index((UCAZ cx_tl), cx_tl),1) lostr = (lostr cx_tl) } } lstr = length(lostr) mystr = "" pchar = " " for( qnsi=1; qnsi <= lstr; ++qnsi) { mychar = substr( lostr, qnsi, 1) if( pchar == " " || \ pchar == "," || \ pchar == "." || \ pchar == "-" || \ pchar == "'" || \ pchar == "(" || \ pchar == "*" || \ pchar == "/" ) { mychar = substr((UCAZ mychar),\ index((lcaz mychar), mychar),1) } if( ( mychar != "*" && mychar != "$" && mychar != "/" ) \ || (mychar == pchar) ) mystr = (mystr mychar) if( pchar == "/" ){ if( mychar == "$" || mychar == "-" ) { pchar = mychar } # end if( mychar == "$" || mychar == "-" ) } else { pchar = mychar } # end if( pchar == "/" ) } # end for( qnsi=1; qnsi <= lstr; ++qnsi) # # See if a specific replacement was given # { lx_tu = length(mystr) tx_tu = mystr name_temp = "" for (ix_tu = 1; ix_tu <= lx_tu; ++ix_tu) { cx_tu = substr(tx_tu,ix_tu,1) cx_tu = substr((UCAZ cx_tu), \ index((lcaz cx_tu), cx_tu),1) name_temp = (name_temp cx_tu) } } if (rep_name[name_temp] != "") mystr = rep_name[name_temp] # # See if there is a comma in place if so we are done # if (index(mystr,comma) != 0 ){ ret_val=mystr } else { nam_suf = "" num_namp = split(mystr,x_namep," ") if (num_namp > 1) { { lx_tu = length(x_namep[num_namp]) tx_tu = x_namep[num_namp] xtemp = "" for (ix_tu = 1; ix_tu <= lx_tu; ++ix_tu) { cx_tu = substr(tx_tu,ix_tu,1) cx_tu = substr((UCAZ cx_tu), \ index((lcaz cx_tu), cx_tu),1) xtemp = (xtemp cx_tu) } } if (rep_suffix[xtemp] != ""){ if(junior_on_last == "yes") { x_namep[num_namp] = rep_suffix[xtemp] } else { nam_suf = (" " rep_suffix[xtemp] ) --num_namp } } mystr = x_namep[1] for (knamp = 2; knamp <= num_namp; ++knamp) { mystr = (mystr " " x_namep[knamp]) } } # end if (num_namp > 1) llname = length(mystr) cc = "" for (kc=llname-1; kc>1; --kc) { cp = cc cc = substr(mystr,kc,1) if (cc == ".") { if (cp != " ") { mystr = (substr(mystr,kc+1,llname-kc) comma " " \ substr(mystr,1,kc)) } else { mystr = (substr(mystr,kc+2,llname-kc-1) comma " " \ substr(mystr,1,kc)) } # if (cp != " ") kc=0 } # end if (cc == ".") } # for (kc=llname-1; kc>1; --kc) mystr = (mystr nam_suf) ret_val=mystr } # end if (index(mystr,comma) != 0 ) authors[ii]=ret_val} } } for (j=1; j<= num_auth; ++j ) { if ( (primary) && i == "1" ) printf (" primary '%s' \n", authors[j]) else printf (" %3s '%s' \n", i-cit_decr, authors[j]) } } } flush_refs = 0 } } } { if (first_field == "REMARK") { xlat_flag=xlat_save ++all_remarks # parse record remark_number = substr ( ($0), 8, 3) remark_cont = substr ( ($0),17, 2) jrnl_rec_type = substr ( ($0),13, 4) jrnl_refNum = substr ( ($0),22, 49)+0 remark_text = substr ( ($0),12, 60) remark_cit_text = substr ( ($0),20, 51) if (convtext == "yes" && jrnl_rec_type == "TITL") { {# # apply PDB typsetting codes if any to a line # { lx_tl = length(remark_cit_text) tx_tl = remark_cit_text lostr = "" for (ix_tl = 1; ix_tl <= lx_tl; ++ix_tl) { cx_tl = substr(tx_tl,ix_tl,1) cx_tl = substr((lcaz cx_tl), \ index((UCAZ cx_tl), cx_tl),1) lostr = (lostr cx_tl) } } lstr = length(lostr) mystr = "" pchar = " " for( qtsi=1; qtsi <= lstr; ++qtsi) { mychar = substr( lostr, qtsi, 1) if( pchar == " " || \ pchar == "," || \ pchar == "." || \ pchar == "(" || \ pchar == "*" || \ pchar == "/" ) { mychar = substr((UCAZ mychar),\ index((lcaz mychar), mychar),1) } if( ( mychar != "*" && mychar != "$" && mychar != "/" ) \ || (mychar == pchar) ) mystr = (mystr mychar) if( pchar == "/" ){ if( mychar == "$" || mychar == "-" ) { pchar = mychar } } else pchar = mychar } ret_val=mystr remark_cit_text=ret_val} } remark_test = substr ( ($0),12, 3) # Deal with change of remark number remark_test = substr ( ($0), 12, 3) if (remark_number_old != remark_number) { remark_flag = "0" remark_number_old = remark_number if (remark_number != " 3" && \ remark_number != " 2" && \ remark_number != " 1") { printf (";\n\n") } } # # As of the February 1996 PDB format, Remark 4 contains text # indicating the format with which the entry complies # if (remark_number == " 4"){ if (substr($0, 17, 23) == \ "COMPLIES WITH FORMAT V.") \ compliance_level = substr($0, 41,3) } # type 1 remarks - additional references # data items identical to JRNL if (remark_number == " 1" && remark_test == "REF") { ++cit_flag flush_refs = 1 cit_refNum[cit_flag] = jrnl_refNum } if (remark_number == " 1" && remark_test != "REF" \ && remark_test != " ") { # Assign TITL records if (jrnl_rec_type == "TITL" && remark_cont == " ") { cit_title_1[cit_flag] = remark_cit_text cit_title_2[cit_flag] = "" } if (jrnl_rec_type == "TITL" && remark_cont != " ") { if ( cit_title_2[cit_flag] == "" ) \ cit_title_2[cit_flag] = remark_cit_text else \ cit_title_2[cit_flag] = \ (cit_title_2[cit_flag] "\n " remark_cit_text ) } # Assign AUTH records if (jrnl_rec_type == "AUTH" && remark_cont == " ") { cit_auth_1[cit_flag] = remark_cit_text cit_auth_2[cit_flag] = "" } if (jrnl_rec_type == "AUTH" && remark_cont != " ") cit_auth_2[cit_flag] = \ (cit_auth_2[cit_flag] remark_cit_text ) # Assign EDIT records if (jrnl_rec_type == "EDIT" && remark_cont == " ") { cit_edit_1[cit_flag] = remark_cit_text cit_edit_2[cit_flag] = "" } if (jrnl_rec_type == "EDIT" && remark_cont != " ") cit_edit_2[cit_flag] = \ ( cit_edit_2[cit_flag] remark_cit_text ) # Assign REF records if (jrnl_rec_type == "REF " && remark_cont == " ") { jour_1[cit_flag] = substr ( ($0), 20, 28) jour_2[cit_flag] = "" volu[cit_flag] = substr ( ($0), 52, 4) page[cit_flag] = substr ( ($0), 57, 5) year[cit_flag] = substr ( ($0), 63, 4) jrnl_pub_pub_1[cit_flag] = "?" } if (jrnl_rec_type == "REF " && remark_cont != " ") { if ( jour_2[cit_flag] == "" ) \ jour_2[cit_flag] = substr ( ($0), 20, 28) else \ jour_2[cit_flag] = \ ( jour_2[cit_flag] "\n " substr ( ($0), 20, 28)) } # Assign PUBL records if (jrnl_rec_type == "PUBL" && remark_cont == " ") { jrnl_pub_pub_1[cit_flag] = substr ( ($0), 20, 51) jrnl_pub_pub_2[cit_flag] = "" } if (jrnl_rec_type == "PUBL" && remark_cont != " ") { if ( jrnl_pub_pub_2[cit_flag] == "" ) \ jrnl_pub_pub_2[cit_flag] = substr ( ($0), 20, 51) else \ jrnl_pub_pub_2[cit_flag] = \ ( jrnl_pub_pub_2[cit_flag] "\n " substr ( ($0), 20, 51)) } if (jrnl_rec_type == "REFN") { astm[cit_flag] = substr ( ($0), 25, 6) country[cit_flag] = substr ( ($0), 33, 2) issn_isbn[cit_flag] = substr ( ($0), 36, 4) if (issn_isbn[cit_flag] == " " && \ substr(jour_1[cit_flag],1,9) != "TO BE PUB") { if (jrnl_pub_pub_1[cit_flag] != "?") { issn_isbn[cit_flag] = "ISBN" } if (volu[cit_flag] == "" || \ volu[cit_flag] == "?" || \ volu[cit_flag] == " ") { issn_isbn[cit_flag] = "ISBN" } } if (issn_isbn[cit_flag] != "ISBN") { isbn[cit_flag] = "?" issn[cit_flag] = substr ( ($0), 41, 25) } else { issn[cit_flag] = "?" isbn[cit_flag] = substr ( ($0), 41, 25) } csd[cit_flag] = substr ( ($0), 67, 4) if (csd[cit_flag] == " ") csd[cit_flag] = "?" } ++remark_flag } # # type 2 remarks - resolution # if (remark_number == " 2" && $3 == "RESOLUTION.") { resolution = substr ( ($0), 23, 45) num_split = split(resolution,res_split," ") if ( res_split[1] != "NOT" ) { res_flag = 1 printf ("\n_reflns.entry_id %s \n", head_PDB_code ) printf ("_reflns.d_resolution_high %8.2g \n", res_split[1] ) } ++remark_flag # Include _exptl templates if (verbose == "yes") { printf ("_exptl.absorpt_coefficient_mu ? \n") printf ("_exptl.absorpt_correction_T_max ? \n") printf ("_exptl.absorpt_correction_type ? \n") printf ("_exptl.absorpt_process_details ? \n\n") printf ("_exptl_crystal.colour ? \n") printf ("_exptl_crystal.density_diffrn ? \n") printf ("_exptl_crystal.density_meas ? \n") printf ("_exptl_crystal.density_meas_temp ? \n") printf ("_exptl_crystal.density_method ? \n") printf ("_exptl_crystal.description ? \n") printf ("_exptl_crystal.F_000 ? \n") printf ("_exptl_crystal_face.diffr_chi ? \n") printf ("_exptl_crystal_face.diffr_kappa ? \n") printf ("_exptl_crystal_face.diffr_phi ? \n") printf ("_exptl_crystal_face.diffr_psi ? \n") printf ("_exptl_crystal_face.index_h ? \n") printf ("_exptl_crystal_face.index_k ? \n") printf ("_exptl_crystal_face.index_l ? \n") printf ("_exptl_crystal_face.perp_dist ? \n") printf ("_exptl_crystal.id ? \n") printf ("_exptl_crystal.preparation ? \n") printf ("_exptl_crystal.size_max ? \n") printf ("_exptl_crystal.size_mid ? \n") printf ("_exptl_crystal.size_min ? \n") printf ("_exptl_crystal.size_rad ? \n") printf ("_exptl.crystals_number ? \n") printf ("_exptl_crystal_grow.apparatus ? \n") printf ("_exptl_crystal_grow.atmosphere ? \n") printf ("_exptl_crystal_grow.crystal_id ? \n") printf ("_exptl_crystal_grow.details ? \n") printf ("_exptl_crystal_grow.method ? \n") printf ("_exptl_crystal_grow.method_ref ? \n") printf ("_exptl_crystal_grow.pH ? \n") printf ("_exptl_crystal_grow.pressure ? \n") printf ("_exptl_crystal_grow.seeding ? \n") printf ("_exptl_crystal_grow.seeding_ref ? \n") printf ("_exptl_crystal_grow.temp ? \n") printf ("_exptl_crystal_grow.time ? \n") printf ("\nloop_\n") printf ("_exptl_crystal_grow_comp.crystal_id \n") printf ("_exptl_crystal_grow_comp.id \n") printf ("_exptl_crystal_grow_comp.conc \n") printf ("_exptl_crystal_grow_comp.details \n") printf ("_exptl_crystal_grow_comp.name \n") printf ("_exptl_crystal_grow_comp.sol_id \n") printf ("_exptl_crystal_grow_comp.volume \n") printf (" ? ? ? ? ? ? ? \n\n") } # Include additional data items to be added on diffraction experiment. # A rigourous treatment of REMARK 3 might be able to parse some of # this info. if (verbose == "yes") { printf ("_diffrn.ambient_temp ? \n") printf ("_diffrn.ambient_pressure ? \n") printf ("_diffrn_attenuator.code ? \n") printf ("_diffrn_attenuator.scale ? \n") printf ("_diffrn.details ? \n\n") printf ("_diffrn.ambient_environment ? \n") printf ("_diffrn.crystal_support ? \n") printf ("_diffrn.crystal_treatment ? \n\n") printf ("_diffrn_measurement.method ? \n") printf ("_diffrn_measurement.details ? \n") printf ("_diffrn_measurement.device ? \n") printf ("_diffrn_measurement.device_details ? \n") printf ("_diffrn_measurement.device_type ? \n") printf ("_diffrn_orient_matrix.type ? \n") printf ("_diffrn_orient_matrix.UB[1][1] ? \n") printf ("_diffrn_orient_matrix.UB[1][2] ? \n") printf ("_diffrn_orient_matrix.UB[1][3] ? \n") printf ("_diffrn_orient_matrix.UB[2][1] ? \n") printf ("_diffrn_orient_matrix.UB[2][2] ? \n") printf ("_diffrn_orient_matrix.UB[2][3] ? \n") printf ("_diffrn_orient_matrix.UB[3][1] ? \n") printf ("_diffrn_orient_matrix.UB[3][2] ? \n") printf ("_diffrn_orient_matrix.UB[3][3] ? \n\n") printf ("loop_\n") printf ("_diffrn_orient_refln.index_h\n") printf ("_diffrn_orient_refln.index_k\n") printf ("_diffrn_orient_refln.index_l\n") printf ("_diffrn_orient_refln.angle_chi\n") printf ("_diffrn_orient_refln.angle_kappa\n") printf ("_diffrn_orient_refln.angle_phi\n") printf ("_diffrn_orient_refln.angle_psi\n") printf (" ? ? ? ? ? ? ?\n\n") printf ("_diffrn_radiation.filter_edge ? \n") printf ("_diffrn_radiation.inhomogeneity ? \n") printf ("_diffrn_radiation.monochromator ? \n") printf ("_diffrn_radiation.polarisn_norm ? \n") printf ("_diffrn_radiation.polarisn_ratio ? \n") printf ("_diffrn_radiation.collimation ? \n") printf ("_diffrn_radiation.type ? \n\n") printf ("loop_\n") printf ("_diffrn_radiation_wavelength.id \n") printf ("_diffrn_radiation_wavelength.wavelength \n") printf ("_diffrn_radiation_wavelength.wt \n") printf (" ? ? ? \n\n") printf ("_diffrn_detector.detector ? \n") printf ("_diffrn_detector.dtime ? \n") printf ("_diffrn_detector.details ? \n") printf ("_diffrn_detector.type ? \n\n") printf ("_diffrn_source.source ? \n") printf ("_diffrn_source.current ? \n") printf ("_diffrn_source.details ? \n") printf ("_diffrn_source.diffrn_id ? \n") printf ("_diffrn_source.power ? \n") printf ("_diffrn_source.size ? \n") printf ("_diffrn_source.target ? \n") printf ("_diffrn_source.type ? \n\n") printf ("_diffrn_source.voltage ? \n\n") printf ("loop_\n") printf ("_diffrn_refln.index_h \n") printf ("_diffrn_refln.index_k \n") printf ("_diffrn_refln.index_l \n") printf ("_diffrn_refln.angle_chi \n") printf ("_diffrn_refln.angle_kappa \n") printf ("_diffrn_refln.angle_omega \n") printf ("_diffrn_refln.angle_phi \n") printf ("_diffrn_refln.angle_psi \n") printf ("_diffrn_refln.angle_theta \n") printf ("_diffrn_refln.attenuator_code \n") printf ("_diffrn_refln.counts_bg_1 \n") printf ("_diffrn_refln.counts_bg_2 \n") printf ("_diffrn_refln.counts_net \n") printf ("_diffrn_refln.counts_peak \n") printf ("_diffrn_refln.counts_total \n") printf ("_diffrn_refln.detect_slit_horiz \n") printf ("_diffrn_refln.detect_slit_vert \n") printf ("_diffrn_refln.diffrn_id \n") printf ("_diffrn_refln.elapsed_time \n") printf ("_diffrn_refln.intensity_net \n") printf ("_diffrn_refln.intensity_sigma \n") printf ("_diffrn_refln.scale_group_code \n") printf ("_diffrn_refln.scan_mode \n") printf ("_diffrn_refln.scan_mode_backgd \n") printf ("_diffrn_refln.scan_width \n") printf ("_diffrn_refln.sint_over_lambda \n") printf ("_diffrn_refln.standard_code\n") printf ("_diffrn_refln.wavelength \n") printf ("_diffrn_refln.wavelength_id \n") printf \ (" ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\n\n") printf ("_diffrn_reflns.av_R_equivalents ? \n") printf ("_diffrn_reflns.av_sigmaI_over_netI ? \n") printf ("_diffrn_reflns.limit_h_max ? \n") printf ("_diffrn_reflns.limit_h_min ? \n") printf ("_diffrn_reflns.limit_k_max ? \n") printf ("_diffrn_reflns.limit_k_min ? \n") printf ("_diffrn_reflns.limit_l_max ? \n") printf ("_diffrn_reflns.limit_l_min ? \n") printf ("_diffrn_reflns.number ? \n") printf ("_diffrn_reflns.reduction_process ? \n") printf ("_diffrn_reflns.theta_max ? \n") printf ("_diffrn_reflns.theta_min ? \n") printf ("_diffrn_reflns.transf_matrix[1][1] ? \n") printf ("_diffrn_reflns