Cambios
En el instante 21 de octubre de 2025, 9:00:41 UTC,
-
Añadido recurso Improved training of deep convolutional networks via minimum-variance regularized adaptive sampling a Improved training of deep convolutional networks via minimum-variance regularized adaptive sampling
| f | 1 | { | f | 1 | { |
| 2 | "author": "A Rojas-Dominguez, SI Valdez, M Ornelas-Rodriguez, M | 2 | "author": "A Rojas-Dominguez, SI Valdez, M Ornelas-Rodriguez, M | ||
| 3 | Carpio", | 3 | Carpio", | ||
| 4 | "author_email": null, | 4 | "author_email": null, | ||
| 5 | "creator_user_id": "a3da3ec9-3fd4-47a4-8d04-0a90b09614e0", | 5 | "creator_user_id": "a3da3ec9-3fd4-47a4-8d04-0a90b09614e0", | ||
| 6 | "extras": [ | 6 | "extras": [ | ||
| 7 | { | 7 | { | ||
| 8 | "key": "A\u00f1o", | 8 | "key": "A\u00f1o", | ||
| 9 | "value": "2023" | 9 | "value": "2023" | ||
| 10 | }, | 10 | }, | ||
| 11 | { | 11 | { | ||
| 12 | "key": "Google Scholar URL", | 12 | "key": "Google Scholar URL", | ||
| 13 | "value": | 13 | "value": | ||
| 14 | gesize=100&sortby=pubdate&citation_for_view=MG1jyREAAAAJ:VLnqNzywnoUC" | 14 | gesize=100&sortby=pubdate&citation_for_view=MG1jyREAAAAJ:VLnqNzywnoUC" | ||
| 15 | }, | 15 | }, | ||
| 16 | { | 16 | { | ||
| 17 | "key": "Identificador hash", | 17 | "key": "Identificador hash", | ||
| 18 | "value": "1a979632b846" | 18 | "value": "1a979632b846" | ||
| 19 | }, | 19 | }, | ||
| 20 | { | 20 | { | ||
| 21 | "key": "Lugar de publicaci\u00f3n", | 21 | "key": "Lugar de publicaci\u00f3n", | ||
| 22 | "value": "Soft Computing 27 (18), 13237-13253, 2023" | 22 | "value": "Soft Computing 27 (18), 13237-13253, 2023" | ||
| 23 | }, | 23 | }, | ||
| 24 | { | 24 | { | ||
| 25 | "key": "Tipo", | 25 | "key": "Tipo", | ||
| 26 | "value": "Publicaci\u00f3n" | 26 | "value": "Publicaci\u00f3n" | ||
| 27 | }, | 27 | }, | ||
| 28 | { | 28 | { | ||
| 29 | "key": "Tipo de publicaci\u00f3n", | 29 | "key": "Tipo de publicaci\u00f3n", | ||
| 30 | "value": "Otro" | 30 | "value": "Otro" | ||
| 31 | }, | 31 | }, | ||
| 32 | { | 32 | { | ||
| 33 | "key": "URL directo", | 33 | "key": "URL directo", | ||
| 34 | "value": | 34 | "value": | ||
| 35 | "https://www.researchsquare.com/article/rs-983472/latest.pdf" | 35 | "https://www.researchsquare.com/article/rs-983472/latest.pdf" | ||
| 36 | } | 36 | } | ||
| 37 | ], | 37 | ], | ||
| 38 | "groups": [ | 38 | "groups": [ | ||
| 39 | { | 39 | { | ||
| 40 | "description": "", | 40 | "description": "", | ||
| 41 | "display_name": "Publicaciones", | 41 | "display_name": "Publicaciones", | ||
| 42 | "id": "8be672a5-4640-455e-a4f3-46b52b66c07b", | 42 | "id": "8be672a5-4640-455e-a4f3-46b52b66c07b", | ||
| 43 | "image_display_url": "", | 43 | "image_display_url": "", | ||
| 44 | "name": "publicaciones", | 44 | "name": "publicaciones", | ||
| 45 | "title": "Publicaciones" | 45 | "title": "Publicaciones" | ||
| 46 | } | 46 | } | ||
| 47 | ], | 47 | ], | ||
| 48 | "id": "b600fea9-49c5-4c0b-aae4-7bcc05d0b6a7", | 48 | "id": "b600fea9-49c5-4c0b-aae4-7bcc05d0b6a7", | ||
| 49 | "isopen": false, | 49 | "isopen": false, | ||
| 50 | "license_id": null, | 50 | "license_id": null, | ||
| 51 | "license_title": null, | 51 | "license_title": null, | ||
| 52 | "maintainer": null, | 52 | "maintainer": null, | ||
| 53 | "maintainer_email": null, | 53 | "maintainer_email": null, | ||
| 54 | "metadata_created": "2025-10-21T09:00:41.059321", | 54 | "metadata_created": "2025-10-21T09:00:41.059321", | ||
| n | 55 | "metadata_modified": "2025-10-21T09:00:41.059330", | n | 55 | "metadata_modified": "2025-10-21T09:00:41.593623", |
| 56 | "name": "1a979632b846", | 56 | "name": "1a979632b846", | ||
| 57 | "notes": "Fostered by technological and theoretical developments, | 57 | "notes": "Fostered by technological and theoretical developments, | ||
| 58 | deep neural networks (DNNs) have achieved great success in many | 58 | deep neural networks (DNNs) have achieved great success in many | ||
| 59 | applications, but their training via mini-batch stochastic gradient | 59 | applications, but their training via mini-batch stochastic gradient | ||
| 60 | descent (SGD) can be very costly due to the possibly tens of millions | 60 | descent (SGD) can be very costly due to the possibly tens of millions | ||
| 61 | of parameters to be optimized and the large amounts of training | 61 | of parameters to be optimized and the large amounts of training | ||
| 62 | examples that must be processed. The computational cost is exacerbated | 62 | examples that must be processed. The computational cost is exacerbated | ||
| 63 | by the inefficiency of the uniform sampling typically used by SGD to | 63 | by the inefficiency of the uniform sampling typically used by SGD to | ||
| 64 | form the training mini-batches: since not all training examples are | 64 | form the training mini-batches: since not all training examples are | ||
| 65 | equally relevant for training, sampling these under a uniform | 65 | equally relevant for training, sampling these under a uniform | ||
| 66 | distribution is far from optimal, making the case for the study of | 66 | distribution is far from optimal, making the case for the study of | ||
| 67 | improved methods to train DNNs. A better strategy is to sample the | 67 | improved methods to train DNNs. A better strategy is to sample the | ||
| 68 | training instances under a distribution where the probability of being | 68 | training instances under a distribution where the probability of being | ||
| 69 | selected is proportional to the relevance of each individual instance; | 69 | selected is proportional to the relevance of each individual instance; | ||
| 70 | one way to achieve this is through importance sampling (IS), which | 70 | one way to achieve this is through importance sampling (IS), which | ||
| 71 | minimizes the gradients\u2019 variance w.r.t. the network parameters, | 71 | minimizes the gradients\u2019 variance w.r.t. the network parameters, | ||
| 72 | consequently improving convergence. In this paper, an IS-based | 72 | consequently improving convergence. In this paper, an IS-based | ||
| 73 | adaptive sampling method to improve the training of DNNs is | 73 | adaptive sampling method to improve the training of DNNs is | ||
| 74 | introduced. This method exploits side information to construct the | 74 | introduced. This method exploits side information to construct the | ||
| 75 | optimal sampling distribution and is dubbed regularized adaptive | 75 | optimal sampling distribution and is dubbed regularized adaptive | ||
| 76 | sampling (RAS). Experimental comparison using deep convolutional | 76 | sampling (RAS). Experimental comparison using deep convolutional | ||
| 77 | networks for classification of the MNIST and CIFAR-10 datasets shows | 77 | networks for classification of the MNIST and CIFAR-10 datasets shows | ||
| 78 | that when compared against SGD and against another sampling method in | 78 | that when compared against SGD and against another sampling method in | ||
| 79 | the state of the art, RAS produces improvements in the speed and | 79 | the state of the art, RAS produces improvements in the speed and | ||
| 80 | variance of the training process without incurring significant | 80 | variance of the training process without incurring significant | ||
| 81 | overhead or affecting the classification.", | 81 | overhead or affecting the classification.", | ||
| n | 82 | "num_resources": 0, | n | 82 | "num_resources": 1, |
| 83 | "num_tags": 3, | 83 | "num_tags": 3, | ||
| 84 | "organization": { | 84 | "organization": { | ||
| 85 | "approval_status": "approved", | 85 | "approval_status": "approved", | ||
| 86 | "created": "2022-05-19T00:10:30.480393", | 86 | "created": "2022-05-19T00:10:30.480393", | ||
| 87 | "description": "Observatorio Metropolitano CentroGeo", | 87 | "description": "Observatorio Metropolitano CentroGeo", | ||
| 88 | "id": "b3b3a79d-748a-4464-9471-732b6c74ec53", | 88 | "id": "b3b3a79d-748a-4464-9471-732b6c74ec53", | ||
| 89 | "image_url": | 89 | "image_url": | ||
| 90 | "2022-05-19-001030.456616FullColor1280x1024LogoOnly.png", | 90 | "2022-05-19-001030.456616FullColor1280x1024LogoOnly.png", | ||
| 91 | "is_organization": true, | 91 | "is_organization": true, | ||
| 92 | "name": "observatorio-metropolitano-centrogeo", | 92 | "name": "observatorio-metropolitano-centrogeo", | ||
| 93 | "state": "active", | 93 | "state": "active", | ||
| 94 | "title": "Observatorio Metropolitano CentroGeo", | 94 | "title": "Observatorio Metropolitano CentroGeo", | ||
| 95 | "type": "organization" | 95 | "type": "organization" | ||
| 96 | }, | 96 | }, | ||
| 97 | "owner_org": "b3b3a79d-748a-4464-9471-732b6c74ec53", | 97 | "owner_org": "b3b3a79d-748a-4464-9471-732b6c74ec53", | ||
| 98 | "private": false, | 98 | "private": false, | ||
| 99 | "relationships_as_object": [], | 99 | "relationships_as_object": [], | ||
| 100 | "relationships_as_subject": [], | 100 | "relationships_as_subject": [], | ||
| t | 101 | "resources": [], | t | 101 | "resources": [ |
| 102 | { | ||||
| 103 | "cache_last_updated": null, | ||||
| 104 | "cache_url": null, | ||||
| 105 | "created": "2025-10-21T09:00:41.607684", | ||||
| 106 | "datastore_active": false, | ||||
| 107 | "description": "Fostered by technological and theoretical | ||||
| 108 | developments, deep neural networks (DNNs) have achieved great success | ||||
| 109 | in many applications, but their training via mini-batch stochastic | ||||
| 110 | gradient descent (SGD) can be very costly due to the possibly tens of | ||||
| 111 | millions of parameters to be optimized and the large amounts of | ||||
| 112 | training examples that must be processed. The computational cost is | ||||
| 113 | exacerbated by the inefficiency of the uniform sampling typically used | ||||
| 114 | by SGD to form the training mini-batches: since not all training | ||||
| 115 | examples are equally relevant for training, sampling these under a | ||||
| 116 | uniform distribution is far from optimal, making the case for the | ||||
| 117 | study of improved methods to train DNNs. A better strategy is to | ||||
| 118 | sample the training instances under a distribution where the | ||||
| 119 | probability of being selected is proportional to the relevance of each | ||||
| 120 | individual instance; one way to achieve this is through importance | ||||
| 121 | sampling (IS), which minimizes the gradients\u2019 variance w.r.t. the | ||||
| 122 | network parameters, consequently improving convergence. In this paper, | ||||
| 123 | an IS-based adaptive sampling method to improve the training of DNNs | ||||
| 124 | is introduced. This method exploits side information to construct the | ||||
| 125 | optimal sampling distribution and is dubbed regularized adaptive | ||||
| 126 | sampling (RAS). Experimental comparison using deep convolutional | ||||
| 127 | networks for classification of the MNIST and CIFAR-10 datasets shows | ||||
| 128 | that when compared against SGD and against another sampling method in | ||||
| 129 | the state of the art, RAS produces improvements in the speed and | ||||
| 130 | variance of the training process without incurring significant | ||||
| 131 | overhead or affecting the classification.", | ||||
| 132 | "format": "HTML", | ||||
| 133 | "hash": "", | ||||
| 134 | "id": "33af2ee1-5bdc-4702-b301-d99ece29277d", | ||||
| 135 | "last_modified": null, | ||||
| 136 | "metadata_modified": "2025-10-21T09:00:41.597445", | ||||
| 137 | "mimetype": null, | ||||
| 138 | "mimetype_inner": null, | ||||
| 139 | "name": "Improved training of deep convolutional networks via | ||||
| 140 | minimum-variance regularized adaptive sampling", | ||||
| 141 | "package_id": "b600fea9-49c5-4c0b-aae4-7bcc05d0b6a7", | ||||
| 142 | "position": 0, | ||||
| 143 | "resource_type": null, | ||||
| 144 | "size": null, | ||||
| 145 | "state": "active", | ||||
| 146 | "url": | ||||
| 147 | esize=100&sortby=pubdate&citation_for_view=MG1jyREAAAAJ:VLnqNzywnoUC", | ||||
| 148 | "url_type": null | ||||
| 149 | } | ||||
| 150 | ], | ||||
| 102 | "state": "active", | 151 | "state": "active", | ||
| 103 | "tags": [ | 152 | "tags": [ | ||
| 104 | { | 153 | { | ||
| 105 | "display_name": "2023", | 154 | "display_name": "2023", | ||
| 106 | "id": "197c32f9-e42e-4e22-b265-0a55651ced0a", | 155 | "id": "197c32f9-e42e-4e22-b265-0a55651ced0a", | ||
| 107 | "name": "2023", | 156 | "name": "2023", | ||
| 108 | "state": "active", | 157 | "state": "active", | ||
| 109 | "vocabulary_id": null | 158 | "vocabulary_id": null | ||
| 110 | }, | 159 | }, | ||
| 111 | { | 160 | { | ||
| 112 | "display_name": "computer-science", | 161 | "display_name": "computer-science", | ||
| 113 | "id": "29cae056-cd7e-43f7-be5b-b25869a3fbf2", | 162 | "id": "29cae056-cd7e-43f7-be5b-b25869a3fbf2", | ||
| 114 | "name": "computer-science", | 163 | "name": "computer-science", | ||
| 115 | "state": "active", | 164 | "state": "active", | ||
| 116 | "vocabulary_id": null | 165 | "vocabulary_id": null | ||
| 117 | }, | 166 | }, | ||
| 118 | { | 167 | { | ||
| 119 | "display_name": "svaldez", | 168 | "display_name": "svaldez", | ||
| 120 | "id": "f42a8210-1cef-4f03-98a0-a6d3d0d4f848", | 169 | "id": "f42a8210-1cef-4f03-98a0-a6d3d0d4f848", | ||
| 121 | "name": "svaldez", | 170 | "name": "svaldez", | ||
| 122 | "state": "active", | 171 | "state": "active", | ||
| 123 | "vocabulary_id": null | 172 | "vocabulary_id": null | ||
| 124 | } | 173 | } | ||
| 125 | ], | 174 | ], | ||
| 126 | "title": "Improved training of deep convolutional networks via | 175 | "title": "Improved training of deep convolutional networks via | ||
| 127 | minimum-variance regularized adaptive sampling", | 176 | minimum-variance regularized adaptive sampling", | ||
| 128 | "type": "dataset", | 177 | "type": "dataset", | ||
| 129 | "url": | 178 | "url": | ||
| 130 | esize=100&sortby=pubdate&citation_for_view=MG1jyREAAAAJ:VLnqNzywnoUC", | 179 | esize=100&sortby=pubdate&citation_for_view=MG1jyREAAAAJ:VLnqNzywnoUC", | ||
| 131 | "version": null | 180 | "version": null | ||
| 132 | } | 181 | } |
